def test_quantize(self): pred_net = caffe2_pb2.NetDef() pred_net.name = "pred" pred_net.external_input.append("X") pred_net.external_output.append("Y") x_scale = 0.10000000149011612 pred_net.op.add().CopyFrom( core.CreateOperator("Int8Quantize", ["X"], ["Y"], Y_scale=x_scale, Y_zero_point=0)) print(pred_net) X = np.asarray([[1, 0], [0, 1]]).astype(np.float32) workspace.FeedBlob("X", X) workspace.RunNetOnce(pred_net) Y_ref = workspace.FetchInt8Blob("Y") workspace.ResetWorkspace() pred_net_onnxified = onnxifi_caffe2_net( pred_net, {"X": [2, 2]}, debug=True, adjust_batch=False, use_onnx=False, ) num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op) np.testing.assert_equal(num_onnxified_ops, 1) workspace.FeedBlob("X", X) workspace.CreateNet(pred_net_onnxified) workspace.RunNet(pred_net_onnxified.name) Y_glow = workspace.FetchInt8Blob("Y") np.testing.assert_equal(Y_ref.data, Y_glow.data)
def test_batch_permutation(self, N, gc, dc): X = np.round(np.random.rand(N, 10, 20, 3) * 255).astype(np.float32) indices = np.arange(N).astype(np.int32) np.random.shuffle(indices) quantize = core.CreateOperator("Quantize", ["X"], ["X_q"], engine="DNNLOWP") batch_perm = core.CreateOperator("BatchPermutation", ["X_q", "indices"], ["Y_q"], engine="DNNLOWP") net = core.Net("test_net") net.Proto().op.extend([quantize, batch_perm]) workspace.FeedBlob("X", X) workspace.FeedBlob("indices", indices) workspace.RunNetOnce(net) X_q = workspace.FetchInt8Blob("X_q").data Y_q = workspace.FetchInt8Blob("Y_q").data def batch_permutation_ref(X, indices): return np.array([X[i] for i in indices]) Y_q_ref = batch_permutation_ref(X_q, indices) np.testing.assert_allclose(Y_q, Y_q_ref)
def test_resize_nearest(self, N, H, W, C, scale_w, scale_h, gc, dc): X = np.round(np.random.rand(N, H, W, C) * 255).astype(np.float32) quantize = core.CreateOperator("Quantize", ["X"], ["X_q"], engine="DNNLOWP") resize_nearest = core.CreateOperator( "Int8ResizeNearest", ["X_q"], ["Y_q"], width_scale=scale_w, height_scale=scale_h, engine="DNNLOWP", ) net = core.Net("test_net") net.Proto().op.extend([quantize, resize_nearest]) workspace.FeedBlob("X", X) workspace.RunNetOnce(net) X_q = workspace.FetchInt8Blob("X_q").data Y_q = workspace.FetchInt8Blob("Y_q").data def resize_nearest_ref(X): outH = np.int32(H * scale_h) outW = np.int32(W * scale_w) outH_idxs, outW_idxs = np.meshgrid(np.arange(outH), np.arange(outW), indexing="ij") inH_idxs = np.minimum(outH_idxs / scale_h, H - 1).astype(np.int32) inW_idxs = np.minimum(outW_idxs / scale_w, W - 1).astype(np.int32) Y = X[:, inH_idxs, inW_idxs, :] return Y Y_q_ref = resize_nearest_ref(X_q) np.testing.assert_allclose(Y_q, Y_q_ref)
def test_dnnlowp_quantize(self, size, is_empty, absorb, gc, dc): if is_empty: size = 0 min_ = -10.0 max_ = 20.0 X = (np.random.rand(size) * (max_ - min_) + min_).astype(np.float32) X_min = 0 if X.size == 0 else X.min() X_max = 1 if X.size == 0 else X.max() X_scale = (max(X_max, 0) - min(X_min, 0)) / 255 X_zero = np.round(-X_min / X_scale) op_type_list = ["Quantize", "Int8Quantize"] engine = "DNNLOWP" for op_type in op_type_list: net = core.Net("test_net") quantize = core.CreateOperator( op_type, ["X"], ["X_q"], engine=engine, device_option=gc ) net.Proto().op.extend([quantize]) dnnlowp_pybind11.CreateInt8QuantParamsBlob( "quant_param", float(X_scale), int(X_zero) ) quantize_2 = core.CreateOperator( op_type, ["X", "quant_param"], ["X_q_2"], engine=engine, device_option=gc, ) net.Proto().op.extend([quantize_2]) if absorb: net_str = dnnlowp_pybind11.freeze_quantization_params( net.Proto().SerializeToString()) net.Proto().ParseFromString(net_str) workspace.FeedBlob("X", X, device_option=gc) workspace.RunNetOnce(net) X_q = workspace.FetchInt8Blob("X_q")[0] X_q_2 = workspace.FetchInt8Blob("X_q_2")[0] # Dequantize results and measure quantization error against inputs X_dq = X_scale * (X_q - X_zero) X_dq_2 = X_scale * (X_q_2 - X_zero) # should be divided by 2 in an exact math, but divide by 1.9 here # considering finite precision in floating-point numbers atol = X_scale / 1.9 np.testing.assert_allclose(X_dq, X, atol=atol, rtol=0) np.testing.assert_allclose(X_dq_2, X, atol=atol, rtol=0)
def _test_int8_quantize(self, n, m, rand_seed): note("n={}, m={}, rand_seed={}".format(n, m, rand_seed)) np.random.seed(rand_seed) X_fp16 = np.random.rand(n, m).astype(np.float16) X_fp32 = X_fp16.astype(np.float32) scale, zero_point = self._get_scale_zp(X_fp32) print("X scale zp", scale, zero_point) ref_net = core.Net("net") ref_net.Int8QuantizeNNPI(["X"], ["X_int8"], Y_scale=scale, Y_zero_point=zero_point) ref_net.Int8DequantizeNNPI(["X_int8"], ["Y"]) ref_net.Proto().external_output.extend(["X_int8"]) # run ref net workspace.ResetWorkspace() workspace.FeedBlob("X", X_fp32) workspace.RunNetOnce(ref_net) X_int8 = workspace.FetchInt8Blob("X_int8") print("after running ", X_int8) Y_fbgemm = workspace.FetchBlob("Y") # run onnxifi net workspace.ResetWorkspace() workspace.FeedBlob("X", X_fp32) ref_net.Proto().op[0].type = "Int8Quantize" ref_net.Proto().op[1].type = "Int8Dequantize" net_onnxified = onnxifi_caffe2_net( ref_net.Proto(), {}, debug=True, adjust_batch=False, use_onnx=False, weight_names=[], ) num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op) # np.testing.assert_equal(num_onnxified_ops, 1) workspace.CreateNet(net_onnxified) workspace.RunNet(net_onnxified.name) X_int8_glow = workspace.FetchInt8Blob("X_int8") Y_glow = workspace.FetchBlob("Y") np.testing.assert_allclose(Y_fbgemm, Y_glow)
def fetch_any_blob(name): bb = None try: bb = workspace.FetchBlob(name) except TypeError: bb = workspace.FetchInt8Blob(name) except Exception as e: logger.error("Get blob {} error: {}".format(name, e)) return bb
def _get_blob(name): bb = None try: bb = workspace.FetchBlob(name) except TypeError: bb = workspace.FetchInt8Blob(name) except Exception as e: print("Get blob {} error: {}".format(name, e)) return bb
def Skip_test_tanhquantize(self, scale, zp, size, rand_seed): np.random.seed(rand_seed) workspace.ResetWorkspace() pred_net = caffe2_pb2.NetDef() pred_net.name = "ref" pred_net.external_input.append("X") pred_net.external_output.append("Y_q") pred_net.op.add().CopyFrom( core.CreateOperator( "Tanh", ["X"], ["Y"] ) ) pred_net.op.add().CopyFrom( core.CreateOperator( "Int8Quantize", ["Y"], ["Y_q"], Y_scale=scale, Y_zero_point=zp ) ) X = np.linspace(-1, 1, size).astype(np.float16).astype(np.float32) pred_net_onnxified = onnxifi_caffe2_net( pred_net, {"X": X.shape}, debug=True, adjust_batch=False, use_onnx=False, ) num_onnxified_ops = sum( 1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op ) np.testing.assert_equal(num_onnxified_ops, 1) workspace.FeedBlob("X", X) workspace.CreateNet(pred_net_onnxified) workspace.RunNet(pred_net_onnxified.name) Y_glow = workspace.FetchInt8Blob("Y_q") ref_net = caffe2_pb2.NetDef() ref_net.name = "ref" ref_net.external_input.append("X") ref_net.external_output.append("Y_q") ref_net.op.add().CopyFrom( core.CreateOperator( "TanhQuantFakeFp16NNPI", ["X"], ["Y_q"], Y_scale=scale, Y_zero_point=zp ) ) workspace.CreateNet(ref_net) workspace.RunNet(ref_net.name) Y_ref = workspace.FetchInt8Blob("Y_q") if not np.array_equal(Y_ref.data, Y_glow.data) or \ not Y_ref.scale == Y_glow.scale or \ not Y_ref.zero_point == Y_glow.zero_point: print_test_debug_info( "tanhfusion", { "scale": scale, "zp": zp, "input": X, "ideal nonquant": np.tanh(X), "Y_glow": Y_glow, "Y_c2": Y_ref, } ) assert(0)
def test_fused_ln_quantize(self, seed, batch_size, size, epsilon, elementwise_affine): np.random.seed(seed) # Reset the workspace workspace.ResetWorkspace() axis = 1 dims = np.array(([batch_size, size])) X = np.random.uniform(size=dims).astype(np.float32) - 0.5 gamma = np.random.randn(*X.shape[axis:]).astype(np.float32) beta = np.random.randn(*X.shape[axis:]).astype(np.float32) Y = self._layernorm_transform(X) scale, zp = self._get_scale_zp(Y) pred_net = caffe2_pb2.NetDef() pred_net.name = "pred" pred_net.external_input.extend(["X", "gamma", "beta"]) pred_net.external_output.extend(["Y_q"]) pred_net.op.add().CopyFrom( core.CreateOperator( "LayerNorm", ["X", "gamma", "beta"] if elementwise_affine else ["X"], ["Y", "mean", "rstd"], axis=axis, epsilon=epsilon, elementwise_affine=elementwise_affine)) pred_net.op.add().CopyFrom( core.CreateOperator("Int8Quantize", ["Y"], ["Y_q"], Y_scale=scale, Y_zero_point=zp)) print(pred_net) pred_net_ref = caffe2_pb2.NetDef() pred_net_ref.name = "pred_ref" pred_net_ref.external_input.extend(["X", "gamma", "beta"]) pred_net_ref.external_output.extend(["Y_q"]) pred_net_ref.op.add().CopyFrom( core.CreateOperator( "LayerNormInt8QuantizeFakeNNPI", ["X", "gamma", "beta"] if elementwise_affine else ["X"], ["Y_q", "mean", "rstd"], axis=axis, epsilon=epsilon, elementwise_affine=elementwise_affine, Y_scale=scale, Y_zero_point=zp)) shape_hits = {"X": X.shape, "gamma": gamma.shape, "beta": beta.shape} pred_net_onnxified = onnxifi_caffe2_net(pred_net, shape_hits, debug=True, adjust_batch=True, use_onnx=False) num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op) np.testing.assert_equal(num_onnxified_ops, 1) workspace.FeedBlob("X", X) workspace.FeedBlob("gamma", gamma) workspace.FeedBlob("beta", beta) workspace.CreateNet(pred_net_ref) workspace.CreateNet(pred_net_onnxified) workspace.RunNet(pred_net_ref.name) Y_c2 = workspace.FetchInt8Blob("Y_q") workspace.RunNet(pred_net_onnxified.name) Y_glow = workspace.FetchInt8Blob("Y_q") if not np.allclose(Y_glow.data, Y_c2.data) or \ Y_glow.scale != Y_c2.scale or Y_glow.zero_point != Y_c2.zero_point: diff_Y = np.abs( Y_glow.data.astype(np.float32) - Y_c2.data.astype(np.float32)) print_test_debug_info( "layernorm", { "seed": seed, "size": size, "batch_size": batch_size, "epsilon": epsilon, "gamma": gamma, "beta": beta, "elementwise_affine": elementwise_affine, "X": X, "Y_glow": Y_glow, "Y_c2": Y_c2, "diff_Y": diff_Y, }) assert (0)
def test_deq_swish_quant(self): workspace.ResetWorkspace() n = 256 X_fp32 = np.linspace(-20.5, 8., num=n).astype(np.float32).reshape(1, n) Y_fp32 = self._swish(X_fp32) X_scale, X_zero_point = self._get_scale_zp(X_fp32) Y_scale, Y_zero_point = self._get_scale_zp(Y_fp32) W_fp32 = np.identity(n, dtype=np.float32) b_fp32 = np.zeros((n,), dtype=np.float32) workspace.FeedBlob("X", X_fp32) workspace.FeedBlob("W", W_fp32) workspace.FeedBlob("b", b_fp32) workspace.RunOperatorOnce( core.CreateOperator( "Int8FCPackWeight", ["W"], ["W_int8"], engine="DNNLOWP", save_unpacked_weights=True, in_scale=X_scale, ) ) ref_net = core.Net("net") ref_net.Int8QuantizeNNPI( ["X"], ["X_int8"], Y_scale=X_scale, Y_zero_point=X_zero_point ) ref_net.Int8FCFakeAcc32NNPI( ["X_int8", "W_int8", "b"], ["U_int8"], Y_scale=X_scale, Y_zero_point=X_zero_point, ) ref_net.Int8DequantizeNNPI( ["U_int8"], ["U_fp16"] ) ref_net.SwishFakeFp16NNPI( ["U_fp16"], ["Y_fp16"] ) ref_net.Int8QuantizeNNPI( ["Y_fp16"], ["Y"], Y_scale=Y_scale, Y_zero_point=Y_zero_point ) ref_net.Proto().external_output.append("Y") # run ref_net workspace.RunNetOnce(ref_net) Y_fbgemm = workspace.FetchInt8Blob("Y") # run onnxifi net ref_net.Proto().op[0].type = "Int8Quantize" ref_net.Proto().op[1].type = "Int8FC" ref_net.Proto().op[2].type = "Int8Dequantize" ref_net.Proto().op[3].type = "Swish" ref_net.Proto().op[4].type = "Int8Quantize" net_onnxified = onnxifi_caffe2_net( ref_net.Proto(), {}, debug=True, adjust_batch=False, use_onnx=False, weight_names=["W_int8", "b"], ) num_onnxified_ops = sum( 1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op ) np.testing.assert_equal(num_onnxified_ops, 1) # TODO: add an assertion to check the optimized net # fused Dequantize->Swish->Quantize to QuantizedSwish workspace.CreateNet(net_onnxified) workspace.RunNet(net_onnxified.name) Y_glow = workspace.FetchInt8Blob("Y") Swish_Ips = workspace.FetchBlob("U_fp16") Swish_Ops = workspace.FetchBlob("Y_fp16") diff_Y = np.abs(Y_glow.data.astype(np.int32) - Y_fbgemm.data.astype(np.int32)) num_mismatches = np.count_nonzero(diff_Y) max_diff = np.max(diff_Y) # TODO: Debug the mismatch and make the test pass with max_diff == 0 if max_diff > 1: print_test_debug_info( "QuantizedSwish", { "X": X_fp32, "Swish_Ips": Swish_Ips, "Swish_Ops": Swish_Ops, "Y_fbgemm": Y_fbgemm, "Y_glow": Y_glow, "diff": diff_Y, "max_diff": max_diff, "num_mismatches": num_mismatches, }, ) assert 0