def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ conv_result = nn_ops.conv2d( NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), strides=strides, padding=padding, data_format="NCHW") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) logit = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") result, _, _ = gen_array_ops.quantize_v2( NchwToNchwVectC(nn_ops.relu(logit)), -128, 127, dtypes.qint8) return result
def compareToTranspose(self, batch_size, out_height, out_width, in_channels, block_size, data_format, use_gpu): in_height = out_height * block_size in_width = out_width * block_size nhwc_input_shape = [batch_size, in_height, in_width, in_channels] nchw_input_shape = [batch_size, in_channels, in_height, in_width] total_size = np.prod(nhwc_input_shape) if data_format == "NCHW_VECT_C": # Initialize the input tensor with qint8 values that circle -127..127. x = [((f + 128) % 255) - 127 for f in range(total_size)] t = constant_op.constant(x, shape=nhwc_input_shape, dtype=dtypes.float32) expected = self.spaceToDepthUsingTranspose(t, block_size, "NHWC") t = test_util.NHWCToNCHW_VECT_C(t) t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8) t = array_ops.space_to_depth(t, block_size, data_format="NCHW_VECT_C") t = gen_array_ops.dequantize(t, -128, 127) actual = test_util.NCHW_VECT_CToNHWC(t) else: # Initialize the input tensor with ascending whole numbers as floats. x = [f * 1.0 for f in range(total_size)] shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape t = constant_op.constant(x, shape=shape, dtype=dtypes.float32) expected = self.spaceToDepthUsingTranspose(t, block_size, data_format) actual = array_ops.space_to_depth(t, block_size, data_format=data_format) with self.cached_session(use_gpu=use_gpu) as sess: actual_vals, expected_vals = sess.run([actual, expected]) self.assertTrue(np.array_equal(actual_vals, expected_vals))
def compareToTranspose(self, batch_size, in_height, in_width, out_channels, block_size, data_format, use_gpu): in_channels = out_channels * block_size * block_size nhwc_input_shape = [batch_size, in_height, in_width, in_channels] nchw_input_shape = [batch_size, in_channels, in_height, in_width] total_size = np.prod(nhwc_input_shape) if data_format == "NCHW_VECT_C": # Initialize the input tensor with qint8 values that circle -127..127. x = [((f + 128) % 255) - 127 for f in range(total_size)] t = constant_op.constant(x, shape=nhwc_input_shape, dtype=dtypes.float32) expected = self.depthToSpaceUsingTranspose(t, block_size, "NHWC") t = test_util.NHWCToNCHW_VECT_C(t) t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8) t = array_ops.depth_to_space(t, block_size, data_format="NCHW_VECT_C") t = gen_array_ops.dequantize(t, -128, 127) actual = test_util.NCHW_VECT_CToNHWC(t) else: # Initialize the input tensor with ascending whole numbers as floats. x = [f * 1.0 for f in range(total_size)] shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape t = constant_op.constant(x, shape=shape, dtype=dtypes.float32) expected = self.depthToSpaceUsingTranspose(t, block_size, data_format) actual = array_ops.depth_to_space(t, block_size, data_format=data_format) with self.test_session(use_gpu=use_gpu) as sess: actual_vals, expected_vals = sess.run([actual, expected]) self.assertTrue(np.array_equal(actual_vals, expected_vals))
def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases, apply_relu): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. apply_relu: A boolean to specify whether to apply "Relu" activation function that clips outputs to the range [0, 127], or "None" activation that clips to the range [-128, 127]. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ conv_result = nn_ops.conv2d( _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), strides=strides, padding=padding, data_format="NCHW") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") if apply_relu: output = nn_ops.relu(output) result, _, _ = gen_array_ops.quantize_v2(_NchwToNchwVectC(output), -128, 127, dtypes.qint8) return result
def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, padding, strides, side_input_scale, side_input, biases, apply_relu): """Simulates the int8 fused 2-D convolution op using separate float ops. The arguments and return values have the same format, meanings and restrictions as the actual op. Args: conv_input_scale: A scalar 'float'. conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. padding: A `string` from: `"SAME", "VALID"`. strides: A list of `ints`. side_input_scale: A scalar 'float'. side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. biases: A `Tensor` of type `float32` in NCHW layout. apply_relu: A boolean to specify whether to apply "Relu" activation function that clips outputs to the range [0, 127], or "None" activation that clips to the range [-128, 127]. Returns: A `Tensor` of type `qint8` in NCHW_VECT_C layout. """ conv_result = nn_ops.conv2d( _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), strides=strides, padding=padding, data_format="NCHW") * conv_input_scale conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw( gen_array_ops.dequantize(side_input, -128, 127)) output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") if apply_relu: output = nn_ops.relu(output) result, _, _ = gen_array_ops.quantize_v2( _NchwToNchwVectC(output), -128, 127, dtypes.qint8) return result
print("------- requantization_range ------") print("min: ", request_min_out.eval(), " max: ", request_max_out.eval()) [rq_out, rq_min_out, rq_max_out] = gen_math_ops.requantize(q_out, min_out, max_out, request_min_out, request_max_out, tf.quint8, name="rQ") print("------- requantize ------") print("min: ", rq_min_out.eval(), " max: ", rq_max_out.eval(), "mean: ", tf.reduce_mean(tf.to_float(tf.bitcast(rq_out, tf.uint8))).eval()) dq_out = gen_array_ops.dequantize(rq_out, rq_min_out, rq_max_out, "MIN_FIRST", name="deQ") reference_out = tf.matmul(a, b) diff = tf.subtract(reference_out, dq_out) diff = tf.reduce_mean(tf.abs(diff)) #average delta per element print("------- dequantize ------") print("mean: ", tf.reduce_mean(tf.bitcast(tf.reduce_mean(dq_out), tf.uint8)).eval()) print("diff: ", diff.eval(), ", percent diff: ", diff.eval() / tf.reduce_mean(reference_out).eval() * 100, "%")