def test_dtype_qint4(): dt = qint4(0.01) assert isinstance(dt, np.dtype) assert "mgb_dtype" in dt.metadata np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01) assert is_quantize(dt) np.testing.assert_allclose(get_scale(dt), 0.01)
def test_conv_bias_int4(): @trace(symbolic=True, capture_as_const=True) def fwd(inp, weight, bias): return F.quantized.conv_bias_activation( inp, weight, bias, dtype=dtype.quint4(scale=1.0, zero_point=0), nonlinear_mode="relu", ) inp = Tensor( np.random.random((1, 3, 64, 64)), dtype=dtype.quint4(scale=1.0, zero_point=0) ) weight = Tensor(np.random.random((32, 3, 3, 3)), dtype=dtype.qint4(scale=1.0)) bias = Tensor(np.random.random((1, 32, 1, 1)), dtype=dtype.qint32(scale=1.0)) result = fwd(inp, weight, bias) check_pygraph_dump(fwd, [inp, weight, bias], [result])
def test_dtype_int4_ffi_handle(): device = "xpux" shape = (3, 3, 3) data = np.random.random(shape).astype(np.float32) * 5 - 1 print(data) def identity(x): return x dtype = quint4(0.01, 7) inp = convert_to_quint4(data, dtype) oup = _get_compiled_result(inp, dtype, shape, device, calc_func=identity) _check_result_attr(oup, dtype, "quint4") np.testing.assert_allclose(convert_from_quint4(oup), convert_from_quint4(inp)) dtype = qint4(0.01) inp = convert_to_qint4(data, dtype) oup = _get_compiled_result(inp, dtype, shape, device, calc_func=identity) _check_result_attr(oup, dtype, "qint4", is_unsigned=False) np.testing.assert_allclose(convert_from_qint4(oup), convert_from_qint4(inp))
def test_conv_bias_int4(): inp_scale = 1.5 w_scale = 2.5 outp_scale = 1.5 inp_dtype = dtype.quint4(inp_scale, 0) w_dtype = dtype.qint4(w_scale) b_dtype = dtype.qint32(inp_scale * w_scale) out_dtype = dtype.quint4(outp_scale, 0) def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="identity", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_quint4(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint4(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_uint4 = mge.Tensor(inpv, dtype=inp_dtype) w_int4 = mge.Parameter(wv, dtype=w_dtype) b_int32 = mge.Parameter(bv, dtype=b_dtype) inp_fp32 = inp_uint4.astype("float32") w_fp32 = w_int4.astype("float32") b_fp32 = b_int32.astype("float32") def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "relu": return F.relu(O) else: return O def run_conv_bias(inp, w, b): b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy())) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_uint4, w_int4, b_int32).astype("float32") expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale) run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1, False) run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1, False) run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False) run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1) run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1) run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2) run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu") run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu")
def test_dtype_qint4(): dt = qint4(0.01) assert isinstance(dt, np.dtype) assert "mgb_dtype" in dt.metadata np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01) assert is_quantize(dt) np.testing.assert_allclose(get_scale(dt), 0.01) @pytest.mark.parametrize( "dtype, dtype_name", [ (quint4(0.01, 5), "quint4"), (qint4(0.01), "qint4"), (quint8(0.01, 135), "quint8"), (qint8(0.01), "qint8"), ], ) def test_dtype_qint_mgb_ffi_handle(dtype, dtype_name): def identity(x): return x convert_to_dtype = eval("convert_to_%s" % dtype_name) convert_from_dtype = eval("convert_from_%s" % dtype_name) device = "xpux" shape = (3, 3, 3) data = np.random.random(shape).astype(np.float32) * 5 - 1 inp = convert_to_dtype(data, dtype)