示例#1
0
    def relu_test(self, inputs, gc, dc):
        X = inputs[0]
        # First dimension is the batch size
        print(X.shape)
        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["X"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(core.CreateOperator("Relu", ["X"], ["Y"]))
        pred_net_ref = caffe2_pb2.NetDef()
        pred_net_ref.name = "ref"
        pred_net_ref.external_input.extend(["X"])
        pred_net_ref.external_output.append("Y_ref")
        pred_net_ref.op.add().CopyFrom(
            core.CreateOperator(
                "ReluFakeFp16",
                ["X"],
                ["Y_ref"],
            ))

        shape_hints = {"X": X.shape}
        pred_net_onnxified = onnxifi_caffe2_net(pred_net,
                                                shape_hints,
                                                debug=True,
                                                adjust_batch=True,
                                                use_onnx=False)
        print(pred_net_onnxified)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)
        workspace.SwitchWorkspace("glow_test_ws", True)
        workspace.FeedBlob("X", X)

        workspace.CreateNet(pred_net_ref)
        workspace.CreateNet(pred_net_onnxified)
        workspace.FeedBlob("X", X)
        # Run caffe2 net
        workspace.RunNet(pred_net_ref.name)
        Y_c2 = workspace.FetchBlob("Y_ref")

        # Run Glow net
        workspace.RunNet(pred_net_onnxified.name)
        Y_glow = workspace.FetchBlob("Y")

        # Results should be identical since we are comparing with the C2 emulation
        if not np.allclose(Y_c2, Y_glow):
            diff = np.abs((Y_glow - Y_c2) / (Y_c2 + kEpsilon))
            print_test_debug_info("Relu", {
                "X": X,
                "Y_glow": Y_glow,
                "Y_c2": Y_c2,
                "diff": diff
            })
            assert (0)
示例#2
0
    def _test_unary_op(self, opname):
        workspace.ResetWorkspace()
        n = 1
        m = 10000
        X = np.linspace(-20, 20, num=m, dtype=np.float32)
        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.append("X")
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(core.CreateOperator(opname, ['X'], ['Y']))
        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.append("X")
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(opname + 'FakeFp16NNPI', ['X'], ['Y']))

        shape_hints = {"X": (n, m)}
        pred_net_onnxified = onnxifi_caffe2_net(pred_net,
                                                shape_hints,
                                                debug=True,
                                                adjust_batch=False,
                                                use_onnx=False)
        print(pred_net_onnxified)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)
        workspace.SwitchWorkspace("glow_test_ws", True)
        workspace.FeedBlob("X", X)
        workspace.CreateNet(ref_net)
        workspace.CreateNet(pred_net_onnxified)
        # Run Glow net
        workspace.RunNet(pred_net_onnxified.name)
        Y_glow = workspace.FetchBlob('Y')
        # Run caffe2 reference net
        workspace.RunNet(ref_net.name)
        Y_c2 = workspace.FetchBlob('Y')

        if not np.allclose(Y_c2, Y_glow):
            diff = np.abs(Y_c2 - Y_glow)
            np.save('/tmp/' + opname + 'diff', diff)
            print_test_debug_info(
                opname, {
                    "X": X,
                    "Y_c2": Y_c2,
                    "Y_glow": Y_glow,
                    "diff": diff,
                    "maxdiff": np.max(diff)
                })
            assert (0)
示例#3
0
    def test_slws_fused_8bit_rowwise_all_same(self):
        # Comment out for predictable debugging
        np.random.seed(int(time.time()))
        workspace.ResetWorkspace()
        n = 1
        m = 2
        data = np.ones((n, m)).astype(np.float32) * 0.2 - 0.1

        max_segments = 5
        max_segment_length = 200
        num_lengths = np.random.randint(1, max_segments + 1)
        # number of segments to run
        lengths = np.random.randint(0, max_segment_length + 1, size=num_lengths).astype(
            np.int32
        )
        num_indices = np.sum(lengths)
        indices = np.zeros(num_indices, dtype=np.int64)
        weights = np.random.uniform(low=-0.5, high=0.5, size=[len(indices)]).astype(
            np.float32
        )

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"]
        )
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            )
        )

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"]
        )
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwiseFakeFP16NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            )
        )

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator(
                "FloatToFused8BitRowwiseQuantized", ["data"], ["quantized_data"]
            )
        )
        pred_net_onnxified = onnxifi_caffe2_net(
            pred_net,
            {},
            max_batch_size=max_segments,
            max_seq_size=max_segments * max_segment_length,
            debug=True,
            adjust_batch=True,
            use_onnx=False,
        )

        num_onnxified_ops = sum(
            1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op
        )
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(pred_net_onnxified)
        workspace.CreateNet(ref_net)

        workspace.RunNet(pred_net_onnxified.name)
        Y_glow = workspace.FetchBlob("Y")

        workspace.RunNet(ref_net.name)
        Y_c2 = workspace.FetchBlob("Y")

        if not np.allclose(Y_c2, Y_glow):
            print_test_debug_info(
                "slws_fused_8bit_rowwise",
                {
                    "indices": indices,
                    "data": data,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_c2": Y_c2,
                    "Y_glow": Y_glow,
                    "diff": Y_glow - Y_c2,
                    "rowwise_diff": (Y_glow - Y_c2)[:, 0],
                },
            )
            assert 0
示例#4
0
    def test_slws_fused_8bit_rowwise_acc32_nnpi(self):
        workspace.GlobalInit(
            [
                "caffe2",
                "--glow_global_fp16=0",
                "--glow_global_fused_scale_offset_fp16=0",
                "--glow_global_force_sls_fp16_accum=0",
            ]
        )
        # Comment out for predictable debugging
        seed = int(time.time() * 1000) % 2 ** 16
        print(seed)
        np.random.seed(seed)
        workspace.ResetWorkspace()

        n = 20000
        DIM = 6
        data = (4 * np.random.random_sample((n, DIM)) + 1).astype(np.float32)

        max_segments = 200
        max_segment_length = 200
        num_lengths = np.random.randint(0, max_segments + 1)
        # number of segments to run
        lengths = np.random.randint(2, max_segment_length + 1, size=num_lengths).astype(
            np.int32
        )
        num_indices = np.sum(lengths)
        indices = np.random.randint(low=0, high=n, size=num_indices, dtype=np.int64)
        weights = np.random.uniform(low=0.01, high=0.5, size=[len(indices)]).astype(
            np.float32
        )

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"]
        )
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            )
        )

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"]
        )
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            )
        )

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator(
                "FloatToFused8BitRowwiseQuantized", ["data"], ["quantized_data"]
            )
        )
        onnxified_net = onnxifi_caffe2_net(
            pred_net,
            {},
            max_batch_size=max_segments,
            max_seq_size=max_segments * max_segment_length,
            debug=True,
            adjust_batch=True,
            use_onnx=False,
        )
        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(onnxified_net)
        workspace.CreateNet(ref_net)

        workspace.RunNet(onnxified_net.name)
        Y_glow = workspace.FetchBlob("Y")

        workspace.RunNet(ref_net.name)
        Y_ref = workspace.FetchBlob("Y")

        diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8))
        max_err = np.max(diff, axis=1)
        num_offenders = (max_err > 0).sum()
        if num_offenders > 0:
            print_test_debug_info(
                "test_slws_fused_8bit_rowwise_acc32_nnpi",
                {
                    "indices": indices,
                    "data": data.shape,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_glow": Y_glow,
                    "Y_ref": Y_ref,
                    "diff": diff,
                    "rowwise_diff": np.max(diff, axis=1),
                },
            )
            assert 0
示例#5
0
    def test_small_sls_acc32(self):

        workspace.GlobalInit(
            [
                "caffe2",
                "--glow_global_fp16=0",
                "--glow_global_fused_scale_offset_fp16=0",
                "--glow_global_force_sls_fp16_accum=0",
            ]
        )
        seed = int(time.time() * 1000) % 2 ** 16
        print(seed)
        np.random.seed(seed)
        workspace.ResetWorkspace()

        n = 2
        DIM = 3
        data = 4 * (np.random.random_sample((n, DIM)) + 1).astype(np.float32)

        lengths = np.array([n], dtype=np.int32)
        indices = np.array(range(n), dtype=np.int64)
        weights = np.random.uniform(low=0.01, high=0.5, size=[n]).astype(np.float32)

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"]
        )
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            )
        )

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"]
        )
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            )
        )

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator(
                "FloatToFused8BitRowwiseQuantized", ["data"], ["quantized_data"]
            )
        )

        quantized_data = workspace.FetchBlob("quantized_data")

        onnxified_net = onnxifi_caffe2_net(
            pred_net,
            {},
            max_batch_size=1,
            max_seq_size=n,
            debug=True,
            adjust_batch=True,
            use_onnx=False,
        )
        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(onnxified_net)
        workspace.CreateNet(ref_net)

        workspace.RunNet(onnxified_net.name)
        Y_glow = workspace.FetchBlob("Y")

        workspace.RunNet(ref_net.name)
        Y_ref = workspace.FetchBlob("Y")

        diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8))
        max_err = np.max(diff, axis=1)
        num_offenders = (max_err > 0).sum()
        if num_offenders > 0:
            np.set_printoptions(precision=12)
            print(
                "ref",
                Y_ref.astype(np.float16).astype(np.float32),
                "glow",
                Y_glow.astype(np.float16).astype(np.float32),
            )
            print_test_debug_info(
                "test_small_sls_acc32",
                {
                    "seed": seed,
                    "indices": indices,
                    "data": data,
                    "quantized_data": quantized_data,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_glow": Y_glow,
                    "Y_ref": Y_ref,
                    "diff": diff,
                    "rowwise_diff": np.max(diff, axis=1),
                },
            )
            assert 0
示例#6
0
    def Test_SLS_NonQuantized_fp16(self):
        N = 20000
        DIM = 64
        D = (4 * np.random.random_sample((N, DIM)) + 1).astype(np.float32)
        I = (np.random.randint(0, N, size=12)).astype(np.int64)
        L = np.asarray([4, 4, 4]).astype(np.int32)
        workspace.FeedBlob("D", D)

        ref_c2_net = core.Net("test_ref_c2")
        ref_c2_net.SparseLengthsSum(["D", "I", "L"], "ref_out")
        ref_c2_net.Proto().external_input.extend(["D", "I", "L"])
        ref_c2_net.Proto().external_output.extend(["ref_out"])

        fp16_c2_net = core.Net("test_fp16_c2")
        fp16_c2_net.SparseLengthsSumFakeFP16AccFP16(["D", "I", "L"], "fp16_out")

        input_dict = {}

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["D", "I", "L"])
        pred_net.external_output.append("glow_out")
        pred_net.op.add().CopyFrom(
            core.CreateOperator("SparseLengthsSum", ["D", "I", "L"], ["glow_out"])
        )

        onnxified_net = onnxifi_caffe2_net(
            pred_net,
            input_dict,
            max_batch_size=3,
            max_seq_size=16,
            debug=True,
            adjust_batch=False,
            use_onnx=False,
        )

        num_onnxified_ops = sum(
            1 if op.type == "Onnxifi" else 0 for op in onnxified_net.op
        )
        print(onnxified_net)
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("I", I)
        workspace.FeedBlob("L", L)

        workspace.RunNetOnce(ref_c2_net)
        ref_c2_out = workspace.FetchBlob("ref_out")

        workspace.RunNetOnce(fp16_c2_net)
        fp16_c2_out = workspace.FetchBlob("fp16_out")

        np.testing.assert_allclose(fp16_c2_out, ref_c2_out, atol=1e-3, rtol=1e-3)

        workspace.RunNetOnce(onnxified_net)
        fp16_glow_out = workspace.FetchBlob("glow_out")

        if not np.allclose(fp16_glow_out, fp16_c2_out):
            diff = np.abs(fp16_glow_out - fp16_c2_out)
            print_test_debug_info(
                "sls",
                {
                    "indices": I,
                    "data": D,
                    "lengths": L,
                    "Y_c2": fp16_c2_out,
                    "Y_glow": fp16_glow_out,
                    "diff": diff,
                    "rowwise_diff": diff[:, 0],
                },
            )
            assert 0
示例#7
0
    def test_bn(self):
        size = 30
        input_channels = 20
        batch_size = 40
        seed = int(time.time())
        np.random.seed(seed)

        order = "NCHW"
        epsilon = 1e-3

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["X", "scale", "bias", "mean", "var"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator("SpatialBN",
                                ["X", "scale", "bias", "mean", "var"], ["Y"],
                                order=order,
                                is_test=True,
                                epsilon=epsilon))

        if GLOW_LOWERED_BATCHNORM:
            refopname = "SpatialBNFakeLoweredFp16NNPI"
        else:
            refopname = "SpatialBNFakeFp16NNPI"

        pred_net_ref = caffe2_pb2.NetDef()
        pred_net_ref.name = "pred"
        pred_net_ref.external_input.extend(
            ["X", "scale", "bias", "mean", "var"])
        pred_net_ref.external_output.append("X")
        pred_net_ref.op.add().CopyFrom(
            core.CreateOperator(refopname,
                                ["X", "scale", "bias", "mean", "var"], ["Y"],
                                order=order,
                                is_test=True,
                                epsilon=epsilon))

        scale = np.random.rand(input_channels).astype(np.float32) + 0.5
        bias = np.random.rand(input_channels).astype(np.float32) - 0.5
        mean = np.random.randn(input_channels).astype(np.float32)
        var = np.random.rand(input_channels).astype(np.float32) + 0.5
        X = np.random.rand(batch_size, input_channels, size, size).astype(
            np.float32) - 0.5

        workspace.FeedBlob("scale", scale)
        workspace.FeedBlob("bias", bias)
        workspace.FeedBlob("mean", mean)
        workspace.FeedBlob("var", var)

        # Use for reference to debug
        # Y_np = reference_spatialbn_test16(X, scale, bias, mean, var, epsilon, order)

        pred_net_onnxified = onnxifi_caffe2_net(pred_net, {
            "X": [batch_size, input_channels, size, size],
            "scale": [input_channels],
            "bias": [input_channels],
            "mean": [input_channels],
            "var": [input_channels]
        },
                                                debug=True,
                                                adjust_batch=False,
                                                use_onnx=False)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("X", X)

        workspace.CreateNet(pred_net_onnxified)
        workspace.CreateNet(pred_net_ref)

        workspace.RunNet(pred_net_ref.name)
        Y_c2 = workspace.FetchBlob("Y")

        workspace.RunNet(pred_net_onnxified.name)
        Y_glow = workspace.FetchBlob("Y")

        if not np.allclose(Y_glow.astype(np.float16), Y_c2.astype(np.float16)):
            diff = np.abs(Y_glow - Y_c2).astype(np.float16)
            print_test_debug_info(
                "bn", {
                    "seed": seed,
                    "scale": scale,
                    "bias": bias,
                    "mean": mean,
                    "var": var,
                    "Y_np": Y_c2.shape,
                    "Y_glow": Y_glow.shape,
                    "diff": diff,
                    "rowwise_diff": np.max(np.abs(diff), -1)
                })
            assert (0)
    def test_batch_matmul(self, M, K, N, trans_a, trans_b, run_ints, gc, dc):
        workspace.ResetWorkspace()
        C = 0  # TODO
        batch_dims = np.random.randint(low=1, high=3, size=C,
                                       dtype=np.int64).tolist()

        if run_ints:
            X = np.random.randint(low=1, high=3,
                                  size=((1, M, K))).astype(np.float32)
        else:
            X = 100 * (np.random.rand(*(batch_dims + [M, K])).astype(
                np.float32) - 0.5)
        if trans_a:
            X = X.swapaxes(-1, -2)

        if run_ints:
            Y = np.random.randint(low=1, high=3,
                                  size=((1, K, N))).astype(np.float32)
        else:
            Y = 100 * (np.random.rand(*(batch_dims + [K, N])).astype(
                np.float32) - 0.5)
        if trans_b:
            Y = Y.swapaxes(-1, -2)

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["X", "Y"])
        pred_net.external_output.append("out")
        pred_net.op.add().CopyFrom(
            core.CreateOperator('BatchMatMul', ['X', 'Y'],
                                'out',
                                trans_a=trans_a,
                                trans_b=trans_b))

        pred_net_ref = core.Net("pred_net_ref")
        pred_net_ref.BatchMatMulFP16Acc16Fake(["X", "Y"], ['out'],
                                              trans_a=trans_a,
                                              trans_b=trans_b)

        print("dims", batch_dims, X.shape, Y.shape)
        pred_net_onnxified = onnxifi_caffe2_net(pred_net, {
            "X": X.shape,
            "Y": Y.shape
        },
                                                debug=True,
                                                adjust_batch=False,
                                                use_onnx=False)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("X", X)
        workspace.FeedBlob("Y", Y)
        workspace.CreateNet(pred_net_onnxified)
        workspace.CreateNet(pred_net_ref)

        # Run Glow net
        workspace.RunNet(pred_net_onnxified.name)
        out_glow = workspace.FetchBlob('out')

        # Run caffe2 net
        workspace.RunNet(pred_net_ref)
        out_c2_fakefp16 = workspace.FetchBlob('out')

        diff = np.abs((out_c2_fakefp16 - out_glow) / (out_c2_fakefp16 + 1e-8))
        rowdiff = np.max(diff, axis=1)

        success = True
        if run_ints:
            if not np.allclose(out_glow, out_c2_fakefp16):
                success = False
        else:
            n_offenders = np.count_nonzero(rowdiff[rowdiff > GLOW_MATMUL_RTOL])
            # Find the max difference per row, if more than 10% of the rows
            # are bigger, consider it a failure.
            if n_offenders * 10 > rowdiff.shape[0]:
                success = False

        if not success:
            print_test_debug_info(
                "bmm", {
                    "m": M,
                    "k": K,
                    "n": N,
                    "X": X,
                    "Y": Y,
                    "out_glow": out_glow,
                    "out_c2_fakefp16": out_c2_fakefp16,
                    "diff": diff
                })
            assert (0)
示例#9
0
    def _test_binary_op_graph(self, name):
        # First dimension is the batch size
        dims = np.concatenate((np.array([1]), np.random.randint(1, 20,
                                                                size=3)))
        A = np.random.uniform(low=-100.0, high=100.0,
                              size=dims).astype(np.float32)
        B = np.random.uniform(low=-100.0, high=100.0,
                              size=dims).astype(np.float32)
        print(A.shape, B.shape)
        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["A", "B"])
        pred_net.external_output.append("C")
        pred_net.op.add().CopyFrom(core.CreateOperator(name, ["A", "B"],
                                                       ["C"]))
        pred_net_ref = caffe2_pb2.NetDef()
        pred_net_ref.name = "ref"
        pred_net_ref.external_input.extend(["A", "B"])
        pred_net_ref.external_output.append("C_ref")
        pred_net_ref.op.add().CopyFrom(
            core.CreateOperator(
                name + "FakeFp16",
                ["A", "B"],
                ["C_ref"],
            ))

        shape_hints = {"A": A.shape, "B": B.shape}
        pred_net_onnxified = onnxifi_caffe2_net(pred_net,
                                                shape_hints,
                                                debug=True,
                                                adjust_batch=True,
                                                use_onnx=False)
        print(pred_net_onnxified)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)
        workspace.SwitchWorkspace("glow_test_ws", True)
        workspace.FeedBlob("A", A)
        workspace.FeedBlob("B", B)

        workspace.CreateNet(pred_net_ref)
        workspace.CreateNet(pred_net_onnxified)
        num_iterations = 10
        for _ in range(num_iterations):
            A = np.random.uniform(low=-100.0, high=100.0,
                                  size=dims).astype(np.float32)
            B = np.random.uniform(low=-100.0, high=100.0,
                                  size=dims).astype(np.float32)
            workspace.FeedBlob("A", A)
            workspace.FeedBlob("B", B)
            # Run caffe2 net
            workspace.RunNet(pred_net_ref.name)
            Y_c2 = workspace.FetchBlob("C_ref")

            # Run Glow net
            workspace.RunNet(pred_net_onnxified.name)
            Y_glow = workspace.FetchBlob("C")

            # Results should be identical since we are comparing with the C2 emulation
            if not np.allclose(Y_c2, Y_glow):
                diff = np.abs((Y_glow - Y_c2) / (Y_c2 + kEpsilon))
                print_test_debug_info(
                    name, {
                        "dims": dims,
                        "A": A,
                        "B": B,
                        "Y_glow": Y_glow,
                        "Y_c2": Y_c2,
                        "diff": diff
                    })
                assert (0)
示例#10
0
    def test_fc_exercise(self):
        """ Test that the matmul engine is working, this doesn't test
            precision
        """
        m = np.random.randint(low=4, high=50)
        k = np.random.randint(low=4, high=50)
        n = np.random.randint(low=4, high=50)
        dtype = np.float32
        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["X", "W0", "b0"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "FC",
                ["X", "W0", "b0"],
                ["Y"],
            ))

        workspace.SwitchWorkspace("glow_test_ws", True)
        workspace.ResetWorkspace()
        W0 = np.random.randint(low=1, high=3, size=(n, k)).astype(dtype)
        b0 = np.random.randint(low=1, high=3, size=(n)).astype(dtype)
        workspace.FeedBlob("W0", W0)
        workspace.FeedBlob("b0", b0)

        pred_net_onnxified = onnxifi_caffe2_net(pred_net, {"X": (m, k)},
                                                debug=True,
                                                adjust_batch=False,
                                                use_onnx=False)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        X0 = np.random.randint(low=1, high=3, size=(m, k)).astype(dtype)
        workspace.FeedBlob("X", X0)
        workspace.CreateNet(pred_net_onnxified)
        workspace.CreateNet(pred_net)

        num_iterations = 2
        for _ in range(num_iterations):
            X0 = np.random.randint(low=1, high=3, size=(m, k)).astype(dtype)
            workspace.FeedBlob("X", X0)
            # Run Glow net
            workspace.RunNet(pred_net_onnxified.name)
            Y_glow = workspace.FetchBlob('Y')
            # Run caffe2 net
            workspace.RunNet(pred_net.name)
            Y_c2 = workspace.FetchBlob('Y')
            if not np.allclose(Y_c2, Y_glow):
                print_test_debug_info(
                    "fc", {
                        "m": m,
                        "k": k,
                        "n": n,
                        "X": X0,
                        "W0": W0,
                        "b0": b0,
                        "Y_glow": Y_glow,
                        "Y_c2": Y_c2,
                        "diff": np.abs((Y_c2 - Y_glow) / Y_c2)
                    })
                assert (0)
示例#11
0
    def test_fc_num0(self):
        """ Test numerics, fix a dimension and determine the ranges of error.
            Use Fp16FCAcc16 as a reference.
        """
        np.random.seed(int(time.time()))
        m = np.random.randint(low=4, high=50)
        k = np.random.randint(low=4, high=1000)
        n = np.random.randint(low=4, high=50)
        use_packed = np.random.randint(2)
        W = "W_packed" if use_packed else "W0"
        dtype = np.float32
        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["X", W, "b0"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "FbFCPacked" if use_packed else "FC",
                ["X", W, "b0"],
                ["Y"],
            ))
        pred_net_ref = caffe2_pb2.NetDef()
        pred_net_ref.name = "pred"
        pred_net_ref.external_input.extend(["X", W, "b0"])
        pred_net_ref.external_output.append("Y")
        pred_net_ref.op.add().CopyFrom(
            core.CreateOperator(
                "Fp16FCAcc16NNPI",
                ["X", W, "b0"],
                ["Y"],
            ))

        workspace.SwitchWorkspace("glow_test_ws", True)
        workspace.ResetWorkspace()
        W0 = 10 * (np.random.rand(n, k) - 0.5).astype(np.float16).astype(
            np.float32)
        b0 = 1 * (np.random.rand(n) - 0.5).astype(np.float16).astype(
            np.float32)

        workspace.FeedBlob("W0", W0)
        workspace.FeedBlob("b0", b0)
        workspace.RunOperatorOnce(
            core.CreateOperator(
                "FbGemmPack",
                ['W0'],
                ['W_packed'],
                no_packing=True,
            ))

        pred_net_onnxified = onnxifi_caffe2_net(pred_net, {"X": (m, k)},
                                                debug=True,
                                                adjust_batch=False,
                                                use_onnx=False)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        X0 = np.random.rand(m, k).astype(dtype) - 0.5
        workspace.FeedBlob("X", X0)
        workspace.CreateNet(pred_net_onnxified)
        workspace.CreateNet(pred_net_ref)

        num_iterations = 10
        for _ in range(num_iterations):
            X0 = 100 * (np.random.rand(m, k) - 0.5).\
                astype(np.float16).astype(np.float32)
            workspace.FeedBlob("X", X0)
            # Run Glow net
            workspace.RunNet(pred_net_onnxified.name)
            Y_glow = workspace.FetchBlob('Y')
            # Run caffe2 net
            workspace.RunNet(pred_net_ref.name)
            Y_c2 = workspace.FetchBlob('Y')

            diff = np.abs((Y_c2 - Y_glow) / (Y_c2 + 1e-8))
            rowdiff = np.max(diff, axis=1)

            n_offenders = np.count_nonzero(rowdiff[rowdiff > GLOW_MATMUL_RTOL])
            if n_offenders > 0:
                print_test_debug_info(
                    "fc", {
                        "iter": _,
                        "m": m,
                        "k": k,
                        "n": n,
                        "X": X0,
                        "W0": W0,
                        "b0": b0,
                        "Y_glow": Y_glow,
                        "Y_c2": Y_c2,
                        "diff": diff,
                        "rowdiff": rowdiff
                    })
                assert (0)
示例#12
0
    def test_fc_numeric_cases(self):
        """ Test numerics, use examples found from the unit test.
            Use Fp16FCAcc16NNPI as a reference.
        """
        m = 1
        k = 20
        n = 1
        dtype = np.float32
        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(["X", "W0", "b0"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "FC",
                ["X", "W0", "b0"],
                ["Y"],
            ))
        pred_net_ref = caffe2_pb2.NetDef()
        pred_net_ref.name = "pred"
        pred_net_ref.external_input.extend(["X", "W0", "b0"])
        pred_net_ref.external_output.append("Y")
        pred_net_ref.op.add().CopyFrom(
            core.CreateOperator(
                "Fp16FCAcc16NNPI",
                ["X", "W0", "b0"],
                ["Y"],
            ))

        workspace.SwitchWorkspace("glow_test_ws", True)
        workspace.ResetWorkspace()

        W0 = np.array([[
            0.04882812, 0.21520996, 0.1027832, 0.04489136, -0.07635498,
            0.14587402, -0.06240845, 0.3918457, 0.46362305, -0.11657715,
            0.29174805, 0.02890015, 0.0680542, 0.4255371, -0.42895508,
            -0.4128418, -0.47973633, 0.33251953, 0.27807617, 0.3701172
        ]],
                      dtype=np.float32)
        b0 = [0.47851562]
        b0 = np.array(b0, dtype=np.float32)

        workspace.FeedBlob("W0", W0)
        workspace.FeedBlob("b0", b0)

        pred_net_onnxified = onnxifi_caffe2_net(pred_net, {"X": (m, k)},
                                                debug=True,
                                                adjust_batch=False,
                                                use_onnx=False)
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        X0 = np.random.rand(m, k).astype(dtype) - 0.5
        workspace.FeedBlob("X", X0)
        workspace.CreateNet(pred_net_onnxified)
        workspace.CreateNet(pred_net_ref)

        X_inputs = [
            np.array([[
                -2.94921875e-01, -3.58642578e-01, -1.92871094e-01,
                2.81250000e-01, -1.30126953e-01, 2.32696533e-02,
                -4.55566406e-01, -2.31811523e-01, -1.95190430e-01,
                -7.76977539e-02, -1.29394531e-01, 2.94677734e-01,
                8.96453857e-04, 4.97314453e-01, -6.07604980e-02,
                2.55371094e-01, 3.49853516e-01, -1.37695312e-01,
                2.95410156e-01, -3.67187500e-01
            ]],
                     dtype=np.float32),
            np.array([[
                -0.4494629, -0.22192383, -0.1640625, 0.11480713, -0.09851074,
                -0.02084351, 0.19091797, -0.17468262, -0.47485352, 0.07489014,
                0.03897095, 0.00197601, 0.02835083, -0.27294922, 0.26757812,
                -0.20996094, -0.31103516, -0.41601562, 0.09918213, -0.07696533
            ]],
                     dtype=np.float32),
            np.array([[
                0.01150513, -0.20507812, 0.46704102, 0.00906372, 0.19848633,
                0.3720703, 0.46557617, -0.47436523, -0.35107422, -0.0362854,
                -0.20812988, 0.41918945, 0.09716797, 0.19897461, 0.3876953,
                -0.0165863, 0.23535156, 0.29956055, 0.24389648, -0.23486328
            ]],
                     dtype=np.float32)
        ]

        for i in range(len(X_inputs)):
            workspace.FeedBlob("X", X_inputs[i])
            # Run Glow net
            workspace.RunNet(pred_net_onnxified.name)
            Y_glow = workspace.FetchBlob('Y')
            workspace.RunNet(pred_net_ref.name)
            Y_c2 = workspace.FetchBlob('Y')

            diff = np.abs((Y_c2 - Y_glow) / (Y_c2 + 1e-8))
            rowdiff = np.max(diff, axis=1)

            n_offenders = np.count_nonzero(rowdiff[rowdiff > GLOW_MATMUL_RTOL])
            if n_offenders > 0:
                print_test_debug_info(
                    "fc", {
                        "iter": i,
                        "m": m,
                        "k": k,
                        "n": n,
                        "X": X0,
                        "W0": W0,
                        "b0": b0,
                        "Y_glow": Y_glow,
                        "Y_c2": Y_c2,
                        "diff": diff,
                        "rowdiff": rowdiff
                    })
                assert (0)