def _TestSkipReshapeQuantization(self, is_training): graph = ops.Graph() with graph.as_default(): batch_size, height, width, depth = 5, 128, 128, 3 input1 = array_ops.zeros((batch_size, height, width, depth)) conv = conv2d(input1, 32, [5, 5], stride=2, padding='SAME', weights_initializer=self._WeightInit(0.09), activation_fn=nn_ops.relu6, scope='test/test') reshape = array_ops.reshape( conv, (int(10), int(height / 2), int(width / 2), int(16))) # Insert a fake quant node after the reshape. We will check that one isn't # insert before. array_ops.fake_quant_with_min_max_vars(reshape, -1, 1) quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) # Ensure that there isn't a FakeQuant added before the reshape. self.assertFalse('FakeQuantWithMinMaxVars' in [i.op.type for i in reshape.op.inputs]) graph = ops.Graph() with graph.as_default(): batch_size, height, width, depth = 5, 128, 128, 3 input1 = array_ops.zeros((batch_size, height, width, depth)) conv = conv2d(input1, 32, [5, 5], stride=2, padding='SAME', weights_initializer=self._WeightInit(0.09), activation_fn=nn_ops.relu6, scope='test/test') reshape = array_ops.reshape( conv, (int(10), int(height / 2), int(width / 2), int(16))) # If no fake quant is added after the reshape, a FakeQuant should be added # before the reshape. quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) # Ensure that there isn't a FakeQuant added before the reshape. self.assertTrue('FakeQuantWithMinMaxVars' in [i.op.type for i in reshape.op.inputs])
def call(self, inputs): x = array_ops.fake_quant_with_min_max_vars( inputs, self.min_var, self.max_var) w_fq = array_ops.fake_quant_with_min_max_vars( self.w, self.min_var, self.max_var) x = math_ops.matmul(x, w_fq) x = array_ops.fake_quant_with_min_max_vars( x, self.min_var, self.max_var) return x
def _TestSkipReshapeQuantization(self, is_training): graph = ops.Graph() with graph.as_default(): batch_size, height, width, depth = 5, 128, 128, 3 input1 = array_ops.zeros((batch_size, height, width, depth)) conv = conv2d( input1, 32, [5, 5], stride=2, padding='SAME', weights_initializer=self._WeightInit(0.09), activation_fn=nn_ops.relu6, scope='test/test') reshape = array_ops.reshape( conv, (int(10), int(height / 2), int(width / 2), int(16))) # Insert a fake quant node after the reshape. We will check that one isn't # insert before. array_ops.fake_quant_with_min_max_vars(reshape, -1, 1) quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) # Ensure that there isn't a FakeQuant added before the reshape. self.assertFalse( 'FakeQuantWithMinMaxVars' in [i.op.type for i in reshape.op.inputs]) graph = ops.Graph() with graph.as_default(): batch_size, height, width, depth = 5, 128, 128, 3 input1 = array_ops.zeros((batch_size, height, width, depth)) conv = conv2d( input1, 32, [5, 5], stride=2, padding='SAME', weights_initializer=self._WeightInit(0.09), activation_fn=nn_ops.relu6, scope='test/test') reshape = array_ops.reshape( conv, (int(10), int(height / 2), int(width / 2), int(16))) # If no fake quant is added after the reshape, a FakeQuant should be added # before the reshape. quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) # Ensure that there isn't a FakeQuant added before the reshape. self.assertTrue( 'FakeQuantWithMinMaxVars' in [i.op.type for i in reshape.op.inputs])
def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, narrow_range): """Adds a fake quantization operation. Depending on value of per_channel, this operation may do global quantization or per channel quantization. min_var and max_var should have corresponding shapes: [1] when per_channel == False and [d] when per_channel == True. Args: inputs: a tensor containing values to be quantized. min_var: a variable containing quantization range lower end(s). max_var: a variable containing quantization range upper end(s). per_channel: a boolean specifying whether to use per-channel quantization. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. Returns: a tensor containing quantized values. """ if per_channel: assert len(min_var.get_shape()) == 1 assert len(max_var.get_shape()) == 1 return array_ops.fake_quant_with_min_max_vars_per_channel( inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) else: assert min_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison assert max_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison return array_ops.fake_quant_with_min_max_vars( inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range)
def test_invalid_inputs(self): inputs = constant_op.constant(value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32) with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), "must be rank 0"): self.evaluate( array_ops.fake_quant_with_min_max_vars(inputs=inputs, min=0.0, max=[[1.0], [2.0], [4.0]])) with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), "must be rank 0"): self.evaluate( array_ops.fake_quant_with_min_max_vars(inputs=inputs, min=[[1.0], [2.0], [4.0]], max=1.0))
def _TestOp(self, input_min, input_max, num_bits, narrow_range, expected_nudged_input_min, expected_nudged_input_max, expected_step): inputs = np.array([ expected_nudged_input_min - expected_step, expected_nudged_input_min - 0.01, expected_nudged_input_min, expected_nudged_input_min + 0.01, expected_nudged_input_min + expected_step - 0.01, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step + 0.01, expected_nudged_input_max - 0.01, expected_nudged_input_max, expected_nudged_input_max + 0.01, expected_nudged_input_max + expected_step ], dtype=np.float32) expected = np.array([ expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max ], dtype=np.float32) with self.test_session() as session: with self.test_scope(): input_placeholder = array_ops.placeholder(dtypes.float32, inputs.shape, name="inputs") min_placeholder = array_ops.placeholder(dtypes.float32, (), name="min") max_placeholder = array_ops.placeholder(dtypes.float32, (), name="max") outputs = array_ops.fake_quant_with_min_max_vars( input_placeholder, min_placeholder, max_placeholder, num_bits=num_bits, narrow_range=narrow_range) result = session.run( outputs, { input_placeholder: inputs, min_placeholder: input_min, max_placeholder: input_max }) self.assertAllCloseAccordingToType(result, expected, rtol=1e-3, atol=1e-5, bfloat16_rtol=0.03)
def _TestOp(self, input_min, input_max, num_bits, narrow_range, expected_nudged_input_min, expected_nudged_input_max, expected_step): inputs = np.array( [ expected_nudged_input_min - expected_step, expected_nudged_input_min - 0.01, expected_nudged_input_min, expected_nudged_input_min + 0.01, expected_nudged_input_min + expected_step - 0.01, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step + 0.01, expected_nudged_input_max - 0.01, expected_nudged_input_max, expected_nudged_input_max + 0.01, expected_nudged_input_max + expected_step ], dtype=np.float32) expected = np.array( [ expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_min + expected_step, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max, expected_nudged_input_max ], dtype=np.float32) with self.cached_session() as session: with self.test_scope(): input_placeholder = array_ops.placeholder( dtypes.float32, inputs.shape, name="inputs") min_placeholder = array_ops.placeholder(dtypes.float32, (), name="min") max_placeholder = array_ops.placeholder(dtypes.float32, (), name="max") outputs = array_ops.fake_quant_with_min_max_vars( input_placeholder, min_placeholder, max_placeholder, num_bits=num_bits, narrow_range=narrow_range) result = session.run( outputs, { input_placeholder: inputs, min_placeholder: input_min, max_placeholder: input_max }) self.assertAllCloseAccordingToType( result, expected, rtol=1e-3, atol=1e-5, bfloat16_rtol=0.03)