示例#1
0
 def test_quantize(self):
     pred_net = caffe2_pb2.NetDef()
     pred_net.name = "pred"
     pred_net.external_input.append("X")
     pred_net.external_output.append("Y")
     x_scale = 0.10000000149011612
     pred_net.op.add().CopyFrom(
         core.CreateOperator("Int8Quantize", ["X"], ["Y"],
                             Y_scale=x_scale,
                             Y_zero_point=0))
     print(pred_net)
     X = np.asarray([[1, 0], [0, 1]]).astype(np.float32)
     workspace.FeedBlob("X", X)
     workspace.RunNetOnce(pred_net)
     Y_ref = workspace.FetchInt8Blob("Y")
     workspace.ResetWorkspace()
     pred_net_onnxified = onnxifi_caffe2_net(
         pred_net,
         {"X": [2, 2]},
         debug=True,
         adjust_batch=False,
         use_onnx=False,
     )
     num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                             for o in pred_net_onnxified.op)
     np.testing.assert_equal(num_onnxified_ops, 1)
     workspace.FeedBlob("X", X)
     workspace.CreateNet(pred_net_onnxified)
     workspace.RunNet(pred_net_onnxified.name)
     Y_glow = workspace.FetchInt8Blob("Y")
     np.testing.assert_equal(Y_ref.data, Y_glow.data)
    def test_batch_permutation(self, N, gc, dc):
        X = np.round(np.random.rand(N, 10, 20, 3) * 255).astype(np.float32)
        indices = np.arange(N).astype(np.int32)
        np.random.shuffle(indices)

        quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                       engine="DNNLOWP")
        batch_perm = core.CreateOperator("BatchPermutation",
                                         ["X_q", "indices"], ["Y_q"],
                                         engine="DNNLOWP")

        net = core.Net("test_net")
        net.Proto().op.extend([quantize, batch_perm])

        workspace.FeedBlob("X", X)
        workspace.FeedBlob("indices", indices)
        workspace.RunNetOnce(net)
        X_q = workspace.FetchInt8Blob("X_q").data
        Y_q = workspace.FetchInt8Blob("Y_q").data

        def batch_permutation_ref(X, indices):
            return np.array([X[i] for i in indices])

        Y_q_ref = batch_permutation_ref(X_q, indices)
        np.testing.assert_allclose(Y_q, Y_q_ref)
    def test_resize_nearest(self, N, H, W, C, scale_w, scale_h, gc, dc):
        X = np.round(np.random.rand(N, H, W, C) * 255).astype(np.float32)

        quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                       engine="DNNLOWP")
        resize_nearest = core.CreateOperator(
            "Int8ResizeNearest",
            ["X_q"],
            ["Y_q"],
            width_scale=scale_w,
            height_scale=scale_h,
            engine="DNNLOWP",
        )

        net = core.Net("test_net")
        net.Proto().op.extend([quantize, resize_nearest])

        workspace.FeedBlob("X", X)
        workspace.RunNetOnce(net)
        X_q = workspace.FetchInt8Blob("X_q").data
        Y_q = workspace.FetchInt8Blob("Y_q").data

        def resize_nearest_ref(X):
            outH = np.int32(H * scale_h)
            outW = np.int32(W * scale_w)
            outH_idxs, outW_idxs = np.meshgrid(np.arange(outH),
                                               np.arange(outW),
                                               indexing="ij")
            inH_idxs = np.minimum(outH_idxs / scale_h, H - 1).astype(np.int32)
            inW_idxs = np.minimum(outW_idxs / scale_w, W - 1).astype(np.int32)
            Y = X[:, inH_idxs, inW_idxs, :]
            return Y

        Y_q_ref = resize_nearest_ref(X_q)
        np.testing.assert_allclose(Y_q, Y_q_ref)
    def test_dnnlowp_quantize(self, size, is_empty, absorb, gc, dc):
        if is_empty:
            size = 0
        min_ = -10.0
        max_ = 20.0
        X = (np.random.rand(size) * (max_ - min_) + min_).astype(np.float32)
        X_min = 0 if X.size == 0 else X.min()
        X_max = 1 if X.size == 0 else X.max()
        X_scale = (max(X_max, 0) - min(X_min, 0)) / 255
        X_zero = np.round(-X_min / X_scale)

        op_type_list = ["Quantize", "Int8Quantize"]
        engine = "DNNLOWP"

        for op_type in op_type_list:
            net = core.Net("test_net")

            quantize = core.CreateOperator(
                op_type, ["X"], ["X_q"], engine=engine, device_option=gc
            )
            net.Proto().op.extend([quantize])

            dnnlowp_pybind11.CreateInt8QuantParamsBlob(
                "quant_param", float(X_scale), int(X_zero)
            )
            quantize_2 = core.CreateOperator(
                op_type,
                ["X", "quant_param"],
                ["X_q_2"],
                engine=engine,
                device_option=gc,
            )
            net.Proto().op.extend([quantize_2])
            if absorb:
                net_str = dnnlowp_pybind11.freeze_quantization_params(
                    net.Proto().SerializeToString())
                net.Proto().ParseFromString(net_str)
            workspace.FeedBlob("X", X, device_option=gc)
            workspace.RunNetOnce(net)
            X_q = workspace.FetchInt8Blob("X_q")[0]
            X_q_2 = workspace.FetchInt8Blob("X_q_2")[0]

            # Dequantize results and measure quantization error against inputs
            X_dq = X_scale * (X_q - X_zero)
            X_dq_2 = X_scale * (X_q_2 - X_zero)

            # should be divided by 2 in an exact math, but divide by 1.9 here
            # considering finite precision in floating-point numbers
            atol = X_scale / 1.9
            np.testing.assert_allclose(X_dq, X, atol=atol, rtol=0)
            np.testing.assert_allclose(X_dq_2, X, atol=atol, rtol=0)
示例#5
0
    def _test_int8_quantize(self, n, m, rand_seed):
        note("n={}, m={}, rand_seed={}".format(n, m, rand_seed))
        np.random.seed(rand_seed)
        X_fp16 = np.random.rand(n, m).astype(np.float16)
        X_fp32 = X_fp16.astype(np.float32)
        scale, zero_point = self._get_scale_zp(X_fp32)

        print("X scale zp", scale, zero_point)
        ref_net = core.Net("net")
        ref_net.Int8QuantizeNNPI(["X"], ["X_int8"],
                                 Y_scale=scale,
                                 Y_zero_point=zero_point)
        ref_net.Int8DequantizeNNPI(["X_int8"], ["Y"])
        ref_net.Proto().external_output.extend(["X_int8"])

        # run ref net
        workspace.ResetWorkspace()
        workspace.FeedBlob("X", X_fp32)
        workspace.RunNetOnce(ref_net)

        X_int8 = workspace.FetchInt8Blob("X_int8")
        print("after running ", X_int8)
        Y_fbgemm = workspace.FetchBlob("Y")

        # run onnxifi net
        workspace.ResetWorkspace()
        workspace.FeedBlob("X", X_fp32)
        ref_net.Proto().op[0].type = "Int8Quantize"
        ref_net.Proto().op[1].type = "Int8Dequantize"

        net_onnxified = onnxifi_caffe2_net(
            ref_net.Proto(),
            {},
            debug=True,
            adjust_batch=False,
            use_onnx=False,
            weight_names=[],
        )
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in net_onnxified.op)

        # np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.CreateNet(net_onnxified)
        workspace.RunNet(net_onnxified.name)
        X_int8_glow = workspace.FetchInt8Blob("X_int8")
        Y_glow = workspace.FetchBlob("Y")
        np.testing.assert_allclose(Y_fbgemm, Y_glow)
示例#6
0
def fetch_any_blob(name):
    bb = None
    try:
        bb = workspace.FetchBlob(name)
    except TypeError:
        bb = workspace.FetchInt8Blob(name)
    except Exception as e:
        logger.error("Get blob {} error: {}".format(name, e))

    return bb
def _get_blob(name):
    bb = None
    try:
        bb = workspace.FetchBlob(name)
    except TypeError:
        bb = workspace.FetchInt8Blob(name)
    except Exception as e:
        print("Get blob {} error: {}".format(name, e))

    return bb
示例#8
0
    def Skip_test_tanhquantize(self, scale, zp, size, rand_seed):
        np.random.seed(rand_seed)

        workspace.ResetWorkspace()

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "ref"
        pred_net.external_input.append("X")
        pred_net.external_output.append("Y_q")

        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "Tanh", ["X"], ["Y"]
            )
        )

        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "Int8Quantize", ["Y"], ["Y_q"], Y_scale=scale, Y_zero_point=zp
            )
        )

        X = np.linspace(-1, 1, size).astype(np.float16).astype(np.float32)

        pred_net_onnxified = onnxifi_caffe2_net(
            pred_net,
            {"X": X.shape},
            debug=True,
            adjust_batch=False,
            use_onnx=False,
        )
        num_onnxified_ops = sum(
            1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op
        )
        np.testing.assert_equal(num_onnxified_ops, 1)
        workspace.FeedBlob("X", X)
        workspace.CreateNet(pred_net_onnxified)
        workspace.RunNet(pred_net_onnxified.name)
        Y_glow = workspace.FetchInt8Blob("Y_q")

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.append("X")
        ref_net.external_output.append("Y_q")

        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "TanhQuantFakeFp16NNPI", ["X"], ["Y_q"], Y_scale=scale, Y_zero_point=zp
            )
        )

        workspace.CreateNet(ref_net)
        workspace.RunNet(ref_net.name)
        Y_ref = workspace.FetchInt8Blob("Y_q")

        if not np.array_equal(Y_ref.data, Y_glow.data) or \
           not Y_ref.scale == Y_glow.scale or \
           not Y_ref.zero_point == Y_glow.zero_point:
            print_test_debug_info(
                "tanhfusion",
                {
                    "scale": scale,
                    "zp": zp,
                    "input": X,
                    "ideal nonquant": np.tanh(X),
                    "Y_glow": Y_glow,
                    "Y_c2": Y_ref,
                }
            )
            assert(0)
示例#9
0
    def test_fused_ln_quantize(self, seed, batch_size, size, epsilon,
                               elementwise_affine):
        np.random.seed(seed)

        # Reset the workspace
        workspace.ResetWorkspace()
        axis = 1

        dims = np.array(([batch_size, size]))
        X = np.random.uniform(size=dims).astype(np.float32) - 0.5
        gamma = np.random.randn(*X.shape[axis:]).astype(np.float32)
        beta = np.random.randn(*X.shape[axis:]).astype(np.float32)

        Y = self._layernorm_transform(X)
        scale, zp = self._get_scale_zp(Y)

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["X", "gamma", "beta"])
        pred_net.external_output.extend(["Y_q"])
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "LayerNorm",
                ["X", "gamma", "beta"] if elementwise_affine else ["X"],
                ["Y", "mean", "rstd"],
                axis=axis,
                epsilon=epsilon,
                elementwise_affine=elementwise_affine))
        pred_net.op.add().CopyFrom(
            core.CreateOperator("Int8Quantize", ["Y"], ["Y_q"],
                                Y_scale=scale,
                                Y_zero_point=zp))

        print(pred_net)
        pred_net_ref = caffe2_pb2.NetDef()
        pred_net_ref.name = "pred_ref"
        pred_net_ref.external_input.extend(["X", "gamma", "beta"])
        pred_net_ref.external_output.extend(["Y_q"])
        pred_net_ref.op.add().CopyFrom(
            core.CreateOperator(
                "LayerNormInt8QuantizeFakeNNPI",
                ["X", "gamma", "beta"] if elementwise_affine else ["X"],
                ["Y_q", "mean", "rstd"],
                axis=axis,
                epsilon=epsilon,
                elementwise_affine=elementwise_affine,
                Y_scale=scale,
                Y_zero_point=zp))
        shape_hits = {"X": X.shape, "gamma": gamma.shape, "beta": beta.shape}
        pred_net_onnxified = onnxifi_caffe2_net(pred_net,
                                                shape_hits,
                                                debug=True,
                                                adjust_batch=True,
                                                use_onnx=False)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("X", X)
        workspace.FeedBlob("gamma", gamma)
        workspace.FeedBlob("beta", beta)

        workspace.CreateNet(pred_net_ref)
        workspace.CreateNet(pred_net_onnxified)

        workspace.RunNet(pred_net_ref.name)
        Y_c2 = workspace.FetchInt8Blob("Y_q")

        workspace.RunNet(pred_net_onnxified.name)
        Y_glow = workspace.FetchInt8Blob("Y_q")

        if not np.allclose(Y_glow.data, Y_c2.data) or \
           Y_glow.scale != Y_c2.scale or Y_glow.zero_point != Y_c2.zero_point:
            diff_Y = np.abs(
                Y_glow.data.astype(np.float32) - Y_c2.data.astype(np.float32))
            print_test_debug_info(
                "layernorm", {
                    "seed": seed,
                    "size": size,
                    "batch_size": batch_size,
                    "epsilon": epsilon,
                    "gamma": gamma,
                    "beta": beta,
                    "elementwise_affine": elementwise_affine,
                    "X": X,
                    "Y_glow": Y_glow,
                    "Y_c2": Y_c2,
                    "diff_Y": diff_Y,
                })
            assert (0)
    def test_deq_swish_quant(self):
        workspace.ResetWorkspace()
        n = 256

        X_fp32 = np.linspace(-20.5, 8., num=n).astype(np.float32).reshape(1, n)
        Y_fp32 = self._swish(X_fp32)
        X_scale, X_zero_point = self._get_scale_zp(X_fp32)
        Y_scale, Y_zero_point = self._get_scale_zp(Y_fp32)
        W_fp32 = np.identity(n, dtype=np.float32)
        b_fp32 = np.zeros((n,), dtype=np.float32)

        workspace.FeedBlob("X", X_fp32)
        workspace.FeedBlob("W", W_fp32)
        workspace.FeedBlob("b", b_fp32)

        workspace.RunOperatorOnce(
            core.CreateOperator(
                "Int8FCPackWeight",
                ["W"],
                ["W_int8"],
                engine="DNNLOWP",
                save_unpacked_weights=True,
                in_scale=X_scale,
            )
        )

        ref_net = core.Net("net")
        ref_net.Int8QuantizeNNPI(
            ["X"],
            ["X_int8"],
            Y_scale=X_scale,
            Y_zero_point=X_zero_point
        )
        ref_net.Int8FCFakeAcc32NNPI(
            ["X_int8", "W_int8", "b"],
            ["U_int8"],
            Y_scale=X_scale,
            Y_zero_point=X_zero_point,
        )
        ref_net.Int8DequantizeNNPI(
            ["U_int8"],
            ["U_fp16"]
        )
        ref_net.SwishFakeFp16NNPI(
            ["U_fp16"],
            ["Y_fp16"]
        )
        ref_net.Int8QuantizeNNPI(
            ["Y_fp16"],
            ["Y"],
            Y_scale=Y_scale,
            Y_zero_point=Y_zero_point
        )
        ref_net.Proto().external_output.append("Y")

        # run ref_net
        workspace.RunNetOnce(ref_net)
        Y_fbgemm = workspace.FetchInt8Blob("Y")

        # run onnxifi net
        ref_net.Proto().op[0].type = "Int8Quantize"
        ref_net.Proto().op[1].type = "Int8FC"
        ref_net.Proto().op[2].type = "Int8Dequantize"
        ref_net.Proto().op[3].type = "Swish"
        ref_net.Proto().op[4].type = "Int8Quantize"
        net_onnxified = onnxifi_caffe2_net(
            ref_net.Proto(),
            {},
            debug=True,
            adjust_batch=False,
            use_onnx=False,
            weight_names=["W_int8", "b"],
        )
        num_onnxified_ops = sum(
            1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op
        )
        np.testing.assert_equal(num_onnxified_ops, 1)
        # TODO: add an assertion to check the optimized net
        # fused Dequantize->Swish->Quantize to QuantizedSwish
        workspace.CreateNet(net_onnxified)
        workspace.RunNet(net_onnxified.name)
        Y_glow = workspace.FetchInt8Blob("Y")

        Swish_Ips = workspace.FetchBlob("U_fp16")
        Swish_Ops = workspace.FetchBlob("Y_fp16")

        diff_Y = np.abs(Y_glow.data.astype(np.int32) -
                        Y_fbgemm.data.astype(np.int32))
        num_mismatches = np.count_nonzero(diff_Y)
        max_diff = np.max(diff_Y)
        # TODO: Debug the mismatch and make the test pass with max_diff == 0
        if max_diff > 1:
            print_test_debug_info(
                "QuantizedSwish",
                {
                    "X": X_fp32,
                    "Swish_Ips": Swish_Ips,
                    "Swish_Ops": Swish_Ops,
                    "Y_fbgemm": Y_fbgemm,
                    "Y_glow": Y_glow,
                    "diff": diff_Y,
                    "max_diff": max_diff,
                    "num_mismatches": num_mismatches,
                },
            )
            assert 0