示例#1
0
    def test_fast_memonger(self, input_dim, output_dim, batch_size, do):
        m = model_helper.ModelHelper()
        fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
        fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
        fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)

        fc3.Relu([], fc3)\
           .Softmax([], "pred") \
           .LabelCrossEntropy(["label"], ["xent"]) \
           .AveragedLoss([], "loss")
        input_to_grad = m.AddGradientOperators(["loss"])
        m.net.Proto().device_option.CopyFrom(do)
        m.param_init_net.Proto().device_option.CopyFrom(do)
        static_blobs = \
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
            ["data", "label", "loss", input_to_grad["fc1_w"]]

        optimized_net = memonger.optimize_inference_fast(
            m.Proto(), static_blobs)
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(low=0, high=output_dim,
                                  size=(batch_size, )).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("data", data, device_option=do)
        workspace.FeedBlob("label", label, device_option=do)
        workspace.RunNetOnce(m.net)
        loss = workspace.FetchBlob("loss")
        grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
        workspace.RunNetOnce(optimized_net)
        optimized_loss = workspace.FetchBlob("loss")
        optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
        np.testing.assert_almost_equal(loss, optimized_loss)
        np.testing.assert_almost_equal(grad, optimized_grad)

        self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
示例#2
0
    def test_fast_memonger_unique_outputs(self):
        m = model_helper.ModelHelper()
        fc = []
        for i in range(2):
            z = brew.fc(m,
                        "data{}".format(i),
                        "fc".format(i),
                        dim_in=2,
                        dim_out=2)
            fc.append(z)
        r = []
        # Trick is here to have same input appear twice in a same Sum
        for x in fc:
            for y in fc:
                r.append(brew.sum(m, [x, y], 1))
        concated = brew.concat(m, r, "concated")
        brew.relu(m, concated, "merged")

        static_blobs = \
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
            ["merged"] + ["data{}".format(i) for i in range(len(fc))]

        optimized_net = memonger.optimize_inference_fast(
            m.Proto(), static_blobs)
        for op in optimized_net.op:
            self.assertEqual(len(op.output), len(set(op.output)), str(op))
示例#3
0
    def test_fast_memonger(self, input_dim, output_dim, batch_size, do):
        m = model_helper.ModelHelper()
        fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
        fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
        fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)

        fc3.Relu([], fc3)\
           .Softmax([], "pred") \
           .LabelCrossEntropy(["label"], ["xent"]) \
           .AveragedLoss([], "loss")
        input_to_grad = m.AddGradientOperators(["loss"])
        m.net.Proto().device_option.CopyFrom(do)
        m.param_init_net.Proto().device_option.CopyFrom(do)
        static_blobs = \
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
            ["data", "label", "loss", input_to_grad["fc1_w"]]

        optimized_net = memonger.optimize_inference_fast(
            m.Proto(), static_blobs)
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("data", data, device_option=do)
        workspace.FeedBlob("label", label, device_option=do)
        workspace.RunNetOnce(m.net)
        loss = workspace.FetchBlob("loss")
        grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
        workspace.RunNetOnce(optimized_net)
        optimized_loss = workspace.FetchBlob("loss")
        optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
        np.testing.assert_almost_equal(loss, optimized_loss)
        np.testing.assert_almost_equal(grad, optimized_grad)

        self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
示例#4
0
    def test_resnet_forward_only_fast_simplenet(self):
        '''
        Test C++ memonger that is only for simple nets
        '''
        model = cnn.CNNModelHelper(
            order="NCHW",
            name="test",
            cudnn_exhaustive_search=True,
        )
        with core.NameScope("gpu_0"):
                data = model.net.AddExternalInput("gpu_0/data")
                resnet.create_resnet50(
                    model,
                    data,
                    num_input_channels=3,
                    num_labels=1000,
                    is_test=True
                )

        count_before = count_blobs(model.net.Proto())
        t = time.time()
        optim_proto = memonger.optimize_inference_fast(
            model.net.Proto(),
            set(["gpu_0/data", "gpu_0/last_out_L1000"]).union(
                set(model.net.Proto().external_input))
        )
        print("Optimization took {} secs".format(time.time() - t))
        count_after = count_blobs(optim_proto)
        num_shared_blobs = count_shared_blobs(optim_proto)

        self.assertTrue(count_after < count_before)
        print(count_after, count_before, num_shared_blobs)
        self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0)

        # Run model and compare results
        workspace.RunNetOnce(model.param_init_net)
        data = np.random.rand(4, 3, 227, 227).astype(np.float32)

        workspace.FeedBlob("gpu_0/data", data)
        model.net.Proto().type = 'simple'

        workspace.RunNetOnce(model.net)
        loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")

        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
        np.testing.assert_almost_equal(loss1, optimized_loss1)
示例#5
0
def test_forward_only_fast_simplenet(
    create_model,
    last_out_blob,
    data_blob="gpu_0/data",
    num_labels=1000,
):
    model = cnn.CNNModelHelper(
        order="NCHW",
        name="test",
        cudnn_exhaustive_search=True,
    )
    with core.NameScope("gpu_0"):
        data = model.net.AddExternalInput(data_blob)
        create_model(model,
                     data,
                     num_input_channels=3,
                     num_labels=num_labels,
                     is_test=True)

    count_before = count_blobs(model.net.Proto())
    t = time.time()
    optim_proto = memonger.optimize_inference_fast(
        model.net.Proto(),
        set([data_blob,
             last_out_blob]).union(set(model.net.Proto().external_input)))
    print("Optimization took {} secs".format(time.time() - t))
    count_after = count_blobs(optim_proto)
    num_shared_blobs = count_shared_blobs(optim_proto)

    print(count_after, count_before, num_shared_blobs)

    # Run model and compare results
    workspace.RunNetOnce(model.param_init_net)
    data = np.random.rand(4, 3, 227, 227).astype(np.float32)

    workspace.FeedBlob(data_blob, data)
    model.net.Proto().type = 'simple'

    workspace.RunNetOnce(model.net)
    loss1 = workspace.FetchBlob(last_out_blob)

    workspace.RunNetOnce(optim_proto)
    optimized_loss1 = workspace.FetchBlob(last_out_blob)
    return [(count_after, count_before), (num_shared_blobs),
            (loss1, optimized_loss1)]
示例#6
0
    def test_fast_memonger_unique_outputs(self):
        m = model_helper.ModelHelper()
        fc = []
        for i in range(2):
            z = brew.fc(
                m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2)
            fc.append(z)
        r = []
        # Trick is here to have same input appear twice in a same Sum
        for x in fc:
            for y in fc:
                r.append(brew.sum(m, [x, y], 1))
        concated = brew.concat(m, r, "concated")
        brew.relu(m, concated, "merged")

        static_blobs = \
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
            ["merged"] + ["data{}".format(i) for i in range(len(fc))]

        optimized_net = memonger.optimize_inference_fast(
            m.Proto(), static_blobs)
        for op in optimized_net.op:
            self.assertEqual(len(op.output), len(set(op.output)), str(op))