def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(N, OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def run_batch_conv_bias(inp, w, b): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) result = F.quantized.batch_conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, ) return result.astype("float32") expected = F.conv2d(inp_fp32, w_fp32[0], b_fp32 if has_bias else None)[0] expected = expected.astype(out_dtype).astype("float32") expected = F.flatten(expected) result = run_batch_conv_bias(inp_int8, w_int8, b_int32) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="identity", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def convert_to_nchw4(var): var = F.reshape(var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])) var = F.transpose(var, (0, 1, 3, 4, 2)) return var def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "relu": return F.relu(O) else: return O def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = convert_to_nchw4(b) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) format = "NCHW4" if is_cuda_available() else "NCHW" expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype("float32") if format == "NCHW4": result = F.transpose(result, (0, 1, 4, 2, 3)) expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="IDENTITY", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KW, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = mgb.dtype.get_scale(inp_dtype) w_scale = mgb.dtype.get_scale(w_dtype) b_scale = mgb.dtype.get_scale(b_dtype) inpv = mgb.dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = mgb.dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = mgb.dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") jit.trace.enabled = True b_symbolic = True def convert_to_nchw4(var): return var.reshape(var.shapeof(0), var.shapeof(1) // 4, 4, var.shapeof(2), var.shapeof(3)).dimshuffle(0, 1, 3, 4, 2) @jit.trace(symbolic=b_symbolic) def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "RELU": return F.relu(O) else: return O @jit.trace(symbolic=b_symbolic) def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else np.zeros_like(b) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = F.flatten(b) return F.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) format = "NCHW4" if is_cuda_available() else "NCHW" expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype("float32") if format == "NCHW4": result = result.dimshuffle(0, 1, 4, 2, 3) expected = F.flatten(expected) result = F.flatten(result) assertTensorClose(result.numpy(), expected.numpy())