def test_dnnlowp_depthwise_3x3x3_conv( self, stride_0, stride_1, stride_2, size, group, batch_size, prepack_weight, fuse_relu, share_col_buffer, preserve_activation_sparsity, preserve_weight_sparsity, quantize_groupwise, gc, dc, ): pad = 1 kernel = 3 dilation = 1 input_channels_per_group = 1 output_channels_per_group = 1 order = "NHWC" X, W, b = generate_convnd_inputs( (stride_0, stride_1, stride_2), (pad, ) * 3, (kernel, ) * 3, (dilation, ) * 3, (size, ) * 3, group, input_channels_per_group, output_channels_per_group, batch_size, order, groupwise_quantization=quantize_groupwise, preserve_activation_sparsity=preserve_activation_sparsity, preserve_weight_sparsity=preserve_weight_sparsity, ) Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"]) outputs = [] op = "ConvRelu" if fuse_relu else "Conv" op_engine_list = [(op, ""), (op, "DNNLOWP"), ("Int8" + op, "DNNLOWP")] for op_type, engine in op_engine_list: init_net = core.Net("test_init_net") net = core.Net("test_net") do_quantize = "DNNLOWP" in engine do_dequantize = "DNNLOWP" in engine do_prepack_weight = engine == "DNNLOWP" and prepack_weight if do_quantize: quantize = core.CreateOperator( "Quantize", ["X"], ["X_q"], preserve_activation_sparsity=preserve_activation_sparsity, engine=engine, device_option=gc, ) net.Proto().op.extend([quantize]) if do_prepack_weight: X_min = 0 if X.size == 0 else X.min() X_max = 0 if X.size == 0 else X.max() x_q_param = dnnlowp_utils.choose_quantization_params( X_min, X_max, preserve_activation_sparsity) inputs = ["W"] if do_dequantize: inputs += ["b"] pack = core.CreateOperator( "Int8ConvPackWeight", inputs, ["W_packed"], strides=[stride_0, stride_1, stride_2], kernels=[kernel] * 3, dilations=[dilation] * 3, pads=[pad] * (3 * 2), preserve_weight_sparsity=preserve_weight_sparsity, engine=engine, group=group, quantize_groupwise=quantize_groupwise, in_scale=x_q_param.scale, ) init_net.Proto().op.extend([pack]) conv = core.CreateOperator( op_type, ["X_q" if do_quantize else "X", "W", "b"], ["Y_q" if do_dequantize else "Y"], strides=[stride_0, stride_1, stride_2], kernels=[kernel] * 3, dilations=[dilation] * 3, pads=[pad] * (3 * 2), order=order, shared_buffer=(1 if share_col_buffer else 0), preserve_activation_sparsity=preserve_activation_sparsity, preserve_weight_sparsity=preserve_weight_sparsity, engine=engine, group=group, quantize_groupwise=quantize_groupwise, device_option=gc, ) if do_dequantize or do_prepack_weight: dnnlowp_utils.add_quantization_param_args( conv, outputs[0][0], preserve_activation_sparsity) net.Proto().op.extend([conv]) if do_dequantize: dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc) net.Proto().op.extend([dequantize]) run_conv_or_fc(self, init_net, net, X, W, b, op_type, engine, order, gc, outputs) check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
def _test_dnnlowp_nd_int( self, stride, pad, kernels, dilation, size, group, input_channels_per_group, output_channels_per_group, batch_size, order, prepack_weight, gc, dc, ): assume(group == 1 or dilation == 1) assume((not prepack_weight) or order == "NHWC") ndim = len(kernels) X, W, b = generate_convnd_inputs( (stride, ) * ndim, (pad, ) * ndim, kernels, (dilation, ) * ndim, (size, ) * ndim, group, input_channels_per_group, output_channels_per_group, batch_size, order, ) Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"]) outputs = [] op_engine_list = [("Conv", ""), ("Conv", "DNNLOWP_16"), ("Int8Conv", "DNNLOWP")] for op_type, engine in op_engine_list: init_net = core.Net("test_init_net") net = core.Net("test_net") do_quantize = "DNNLOWP" in engine do_dequantize = "DNNLOWP" in engine # If output scale/zp aren't set, it gets computed from ref fp32 op # in DNNLOWP, which isn't possible when we quantize input weights. # Make sure atleast one output is collected to compute output # scale/zp. do_quantize_weight = engine == "DNNLOWP" and len(outputs) > 0 do_prepack_weight = engine == "DNNLOWP" and prepack_weight if do_quantize: quantize = core.CreateOperator("Quantize", ["X"], ["X_q"], engine=engine, device_option=gc) net.Proto().op.extend([quantize]) X_min = 0 if X.size == 0 else X.min() X_max = 0 if X.size == 0 else X.max() x_q_param = dnnlowp_utils.choose_quantization_params(X_min, X_max) if do_quantize_weight: int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill( W, "W_q") init_net.Proto().op.extend([int8_given_tensor_fill]) # Bias int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill( b, "b_q", x_q_param, w_q_param) init_net.Proto().op.extend([int8_bias_tensor_fill]) if do_prepack_weight: inputs = ["W_q" if do_quantize_weight else "W"] if do_dequantize: inputs += ["b_q" if do_quantize_weight else "b"] pack = core.CreateOperator( "Int8ConvPackWeight", inputs, ["W_packed"], strides=[stride] * ndim, kernels=kernels, dilations=[dilation] * ndim, pads=[pad] * (ndim * 2), engine=engine, group=group, in_scale=x_q_param.scale, ) init_net.Proto().op.extend([pack]) conv = core.CreateOperator( op_type, [ "X_q" if do_quantize else "X", "W_packed" if do_prepack_weight else ("W_q" if do_quantize_weight else "W"), "b_q" if do_quantize_weight else "b", ], ["Y_q" if do_dequantize else "Y"], strides=[stride] * ndim, kernels=kernels, dilations=[dilation] * ndim, pads=[pad] * (ndim * 2), order=order, dequantize_output=not do_dequantize, engine=engine, group=group, device_option=gc, ) if do_quantize_weight or do_prepack_weight: # When quantized weight is provided, we can't rescale the # output dynamically by looking at the range of output of each # batch, so here we provide the range of output observed from # fp32 reference implementation dnnlowp_utils.add_quantization_param_args(conv, outputs[0][0]) net.Proto().op.extend([conv]) if do_dequantize: dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc) net.Proto().op.extend([dequantize]) run_conv_or_fc(self, init_net, net, X, W, b, op_type, engine, order, gc, outputs) check_quantized_results_close(outputs)