Exemplo n.º 1
0
def AddLeNetModel(model, data):
    '''
    This part is the standard LeNet model: from data to the softmax prediction.
    
    For each convolutional layer we specify dim_in - number of input channels
    and dim_out - number or output channels. Also each Conv and MaxPool layer changes the
    image size. For example, kernel of size 5 reduces each side of an image by 4.

    While when we have kernel and stride sizes equal 2 in a MaxPool layer, it divides
    each side in half.
    '''
    # Image size: 28 x 28 -> 24 x 24
    conv1 = brew.conv(model, data, 'conv1', dim_in=1, dim_out=20, kernel=5)
    # Image size: 24 x 24 -> 12 x 12
    pool1 = model.net.MaxPool(conv1, 'pool1', kernel=2, stride=2)
    # Image size: 12 x 12 -> 8 x 8
    conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=50, kernel=5)
    # Image size: 8 x 8 -> 4 x 4
    pool2 = model.net.MaxPool(conv2, 'pool2', kernel=2, stride=2)
    # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
    fc3 = brew.fc(model, pool2, 'fc3', dim_in=50 * 4 * 4, dim_out=500)
    fc3 = model.net.Relu(fc3, 'relu3')
    pred = brew.fc(model, fc3, 'pred', 500, 10)
    softmax = model.net.Softmax(pred, 'softmax')
    return softmax
Exemplo n.º 2
0
    def test_gradient_clipping_using_param_norm(self):
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2)

        # no operator name set, will use default
        fc2 = brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1)

        sigm = model.net.Sigmoid(fc2, 'sigm')
        sq = model.net.SquaredL2Distance([sigm, 'label'], 'sq')
        loss = model.net.SumElements(sq, 'loss')

        grad_map = model.AddGradientOperators([loss])

        grad_map_for_param = {key: grad_map[key] for key in ['fc1_w', 'fc2_w']}

        net_modifier = GradientClipping(
            grad_clip_method='by_norm',
            clip_norm_type='l2_norm',
            clip_threshold=0.1,
            use_parameter_norm=True,
        )

        net_modifier(model.net, grad_map=grad_map_for_param)

        workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32))
        workspace.FeedBlob('label', np.random.rand(10, 1).astype(np.float32))

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        # 5 forward ops + 6 backward ops + 2 * (5 gradient clipping ops)
        self.assertEqual(len(model.net.Proto().op), 21)
Exemplo n.º 3
0
def MLP(order, cudnn_ws, device):
    model = ModelHelper(name="benchmark")
    d = 256
    depth = 20
    width = 3
    for i in range(depth):
        for j in range(width):
            current = "fc_{}_{}".format(i, j) if i > 0 else "data"
            next_ = "fc_{}_{}".format(i + 1, j)
            brew.fc(
                model,
                current, next_,
                dim_in=d, dim_out=d,
                weight_init=('XavierFill', {}),
                bias_init=('XavierFill', {}))

    brew.sum(model, ["fc_{}_{}".format(depth, j) for j in range(width)], ["sum"])
    brew.fc(model, "sum", "last",
             dim_in=d, dim_out=1000,
             weight_init=('XavierFill', {}),
             bias_init=('XavierFill', {}))
    xent = model.LabelCrossEntropy(["last", "label"], "xent")
    if device != 'MKL':
        model.AveragedLoss(xent, "loss")
    return model, d
Exemplo n.º 4
0
    def test_fast_memonger(self, input_dim, output_dim, batch_size, do):
        m = model_helper.ModelHelper()
        fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
        fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
        fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)

        fc3.Relu([], fc3)\
           .Softmax([], "pred") \
           .LabelCrossEntropy(["label"], ["xent"]) \
           .AveragedLoss([], "loss")
        input_to_grad = m.AddGradientOperators(["loss"])
        m.net.Proto().device_option.CopyFrom(do)
        m.param_init_net.Proto().device_option.CopyFrom(do)
        static_blobs = \
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
            ["data", "label", "loss", input_to_grad["fc1_w"]]

        optimized_net = memonger.optimize_inference_fast(
            m.Proto(), static_blobs)
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("data", data, device_option=do)
        workspace.FeedBlob("label", label, device_option=do)
        workspace.RunNetOnce(m.net)
        loss = workspace.FetchBlob("loss")
        grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
        workspace.RunNetOnce(optimized_net)
        optimized_loss = workspace.FetchBlob("loss")
        optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
        np.testing.assert_almost_equal(loss, optimized_loss)
        np.testing.assert_almost_equal(grad, optimized_grad)

        self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
Exemplo n.º 5
0
 def test_registry_invalid(self, input_dim, output_dim, batch_size):
     m = model_helper.ModelHelper()
     brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
     with self.assertRaises(RuntimeError):
         workspace.ApplyTransform(
             "definitely_not_a_real_transform",
             m.net.Proto())
    def test_compute_statistics_for_blobs(self):
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2)

        # no operator name set, will use default
        brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1)

        net_modifier = ComputeStatisticsForBlobs(
            blobs=['fc1_w', 'fc2_w'],
            logging_frequency=10,
        )

        net_modifier(model.net)

        workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32))

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        fc1_w = workspace.FetchBlob('fc1_w')
        fc1_w_summary = workspace.FetchBlob('fc1_w_summary')

        # std is unbiased here
        stats_ref = np.array([fc1_w.flatten().min(), fc1_w.flatten().max(),
                     fc1_w.flatten().mean(), fc1_w.flatten().std(ddof=1)])

        self.assertAlmostEqual(np.linalg.norm(stats_ref - fc1_w_summary), 0,
                               delta=1e-5)
        self.assertEqual(fc1_w_summary.size, 4)

        assert model.net.output_record() is None
    def test_compute_averaged_norm_for_blobs(self):
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2)

        # no operator name set, will use default
        brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1)

        net_modifier = ComputeNormForBlobs(
            blobs=['fc1_w', 'fc2_w'],
            logging_frequency=10,
            compute_averaged_norm=True,
        )

        net_modifier(model.net)

        workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32))

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        fc1_w = workspace.FetchBlob('fc1_w')
        fc1_w_l2_averaged_norm = workspace.FetchBlob('fc1_w_averaged_l2_norm')

        self.assertEqual(fc1_w_l2_averaged_norm.size, 1)
        self.assertAlmostEqual(fc1_w_l2_averaged_norm[0],
                               np.linalg.norm(fc1_w)**2 / fc1_w.size,
                               delta=1e-5)

        self.assertEqual(len(model.net.Proto().op), 8)
Exemplo n.º 8
0
    def testGPUDense(self, dtype=core.DataType.FLOAT):
        device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
        with core.DeviceScope(device_opt):
            model, _perfect_model, data, label = self._createDense(dtype)
            if dtype == core.DataType.FLOAT16:
                fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host')
                model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu')
            else:
                model.CopyGPUToCPU('fc', 'fc_cpu')
            workspace.FeedBlob('data', data[0])
            workspace.FeedBlob('label', label[0])

        # Add some CPU ops
        brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0)

        # Create optimizer in default device scope
        self.build_optimizer(model)

        if self._skip_gpu:
            return

        # Run net to see it does not crash
        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net, True)
        workspace.RunNet(model.net.Proto().name)
    def test_get_entry_from_blobs_modify_output_record(self):
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=4)

        # no operator name set, will use default
        brew.fc(model, fc1, "fc2", dim_in=4, dim_out=4)
        i1, i2 = np.random.randint(4, size=2)
        net_modifier = GetEntryFromBlobs(
            blobs=['fc1_w', 'fc2_w'],
            logging_frequency=10,
            i1=i1,
            i2=i2,
        )
        net_modifier(model.net, modify_output_record=True)

        workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32))
        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        fc1_w = workspace.FetchBlob('fc1_w')
        fc1_w_entry = workspace.FetchBlob('fc1_w_{0}_{1}'.format(i1, i2))

        self.assertEqual(fc1_w_entry.size, 1)
        self.assertEqual(fc1_w_entry[0], fc1_w[i1][i2])

        assert 'fc1_w' + net_modifier.field_name_suffix() in\
            model.net.output_record().field_blobs(),\
            model.net.output_record().field_blobs()
        assert 'fc2_w' + net_modifier.field_name_suffix() in\
            model.net.output_record().field_blobs(),\
            model.net.output_record().field_blobs()
Exemplo n.º 10
0
    def test_release_blobs_when_used(self):
        m = model_helper.ModelHelper()
        fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2)
        fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2)
        fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2)
        fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2)
        m.net.Alias(["u"], ["u_alias"])

        brew.sum(m, [fc3, fc4], "out")

        with_frees = memonger.release_blobs_when_used(m.net.Proto(), set("data"))

        expect_frees = {"x", "y", "z"}  # out is external output
                                        # and u is aliased so cannot be freed
        found_frees = set()
        for op in with_frees.op:
            if op.type == "Free":
                self.assertFalse(op.input[0] in found_frees)  # no double frees
                found_frees.add(op.input[0])
            else:
                # Check a freed blob is not used anymore
                for inp in op.input:
                    self.assertFalse(inp in found_frees)
                for outp in op.output:
                    self.assertFalse(outp in found_frees)

        self.assertEqual(expect_frees, found_frees)
Exemplo n.º 11
0
    def test_optimizer_context(self):
        from caffe2.python import brew, optimizer
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        count = optimizer._optimizer_instance_count['SgdOptimizer']
        cnv_optim = SgdOptimizer(0.15)
        weight_optim = SgdOptimizer(0.2)
        bias_optim = SgdOptimizer(0.1)

        with UseOptimizer(cnv_optim):
            cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
        with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}):
            a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'b', 200, 5)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        add_weight_decay(model, weight_decay=1e-4)
        # use the following optimizer if none specified in param_info
        build_sgd(model, 0.11)
        expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}
        expected_learning_rate = {
            "SgdOptimizer_{}_lr_cpu".format(count): -0.15,
            "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2,
            "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1,
            "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11
        }

        for op in model.net.Proto().op:
            # Check the proto that all weights are decayed and not non-weights
            # are decayed.
            if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
                if op.output[0] not in expected_weight_grad:
                    print(
                        "Unexpected param for weight_decay: {}".
                        format(op.output[0])
                    )
                self.assertTrue(op.output[0] in expected_weight_grad)
                expected_weight_grad.remove(op.output[0])
            # Check the learning rate for each parameter
            if op.type == 'LearningRate':
                val = 0
                for arg in op.arg:
                    if arg.name == 'base_lr':
                        val = arg.f
                self.assertAlmostEqual(
                    val,
                    expected_learning_rate[op.output[0]]
                )

        self.assertEqual(
            expected_weight_grad,
            set(),
            "Not all weights were decayed: {}".format(expected_weight_grad)
        )
Exemplo n.º 12
0
 def test_param_consistence(self):
     model = ModelHelper(name='test_mode')
     cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
     step_model = ModelHelper(name='step_model', param_model=model)
     a = brew.fc(step_model, cnv, 'a', 100, 200)
     brew.fc(model, a, 'b', 200, 5)
     # test the _parameters_info is shared between model and step_model
     self.assertEqual(model._parameters_info, step_model._parameters_info)
 def model_build_fun(model, loss_scale):
     fc1 = brew.fc(model, "data", "fc1", dim_in=8, dim_out=8)
     fc2 = brew.fc(model, fc1, "fc2", dim_in=8, dim_out=8)
     fc3 = brew.fc(model, fc2, "fc3", dim_in=8, dim_out=8)
     fc4 = brew.fc(model, fc3, "fc4", dim_in=8, dim_out=8)
     fc5 = brew.fc(model, fc4, "fc5", dim_in=8, dim_out=8)
     loss = model.net.SumElements([fc5], ["loss"])
     return [loss]
Exemplo n.º 14
0
    def test_mobile_exporter(self):
        model = ModelHelper(name="mobile_exporter_test_model")
        # Test LeNet
        brew.conv(model, 'data', 'conv1', dim_in=1, dim_out=20, kernel=5)
        brew.max_pool(model, 'conv1', 'pool1', kernel=2, stride=2)
        brew.conv(model, 'pool1', 'conv2', dim_in=20, dim_out=50, kernel=5)
        brew.max_pool(model, 'conv2', 'pool2', kernel=2, stride=2)
        brew.fc(model, 'pool2', 'fc3', dim_in=50 * 4 * 4, dim_out=500)
        brew.relu(model, 'fc3', 'fc3')
        brew.fc(model, 'fc3', 'pred', 500, 10)
        brew.softmax(model, 'pred', 'out')

        # Create our mobile exportable networks
        workspace.RunNetOnce(model.param_init_net)
        init_net, predict_net = mobile_exporter.Export(
            workspace, model.net, model.params
        )

        # Populate the workspace with data
        np_data = np.random.rand(1, 1, 28, 28).astype(np.float32)
        workspace.FeedBlob("data", np_data)

        workspace.CreateNet(model.net)
        workspace.RunNet(model.net)
        ref_out = workspace.FetchBlob("out")

        # Clear the workspace
        workspace.ResetWorkspace()

        # Populate the workspace with data
        workspace.RunNetOnce(init_net)
        # Fake "data" is populated by init_net, we have to replace it
        workspace.FeedBlob("data", np_data)

        # Overwrite the old net
        workspace.CreateNet(predict_net, True)
        workspace.RunNet(predict_net.name)
        manual_run_out = workspace.FetchBlob("out")
        np.testing.assert_allclose(
            ref_out, manual_run_out, atol=1e-10, rtol=1e-10
        )

        # Clear the workspace
        workspace.ResetWorkspace()

        # Predictor interface test (simulates writing to disk)
        predictor = workspace.Predictor(
            init_net.SerializeToString(), predict_net.SerializeToString()
        )

        # Output is a vector of outputs but we only care about the first and only result
        predictor_out = predictor.run([np_data])
        assert len(predictor_out) == 1
        predictor_out = predictor_out[0]

        np.testing.assert_allclose(
            ref_out, predictor_out, atol=1e-10, rtol=1e-10
        )
Exemplo n.º 15
0
    def test_fc(self):
        m, n, k = (15, 15, 15)
        X = np.random.rand(m, k).astype(np.float32) - 0.5

        workspace.FeedBlob("x", X)
        model = ModelHelper(name="test_model")
        brew.fc(model, "x", "out_1", k, n)
        model.Validate()
        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)
Exemplo n.º 16
0
    def setUp(self):
        core.GlobalInit(["python", "caffe2"])
        ws.ResetWorkspace()
        self.model = model_helper.ModelHelper()
        brew.fc(self.model, "data", "y",
                    dim_in=4, dim_out=2,
                    weight_init=('ConstantFill', dict(value=1.0)),
                    bias_init=('ConstantFill', dict(value=0.0)),
                    axis=0)
        ws.FeedBlob("data", np.zeros([4], dtype='float32'))

        ws.RunNetOnce(self.model.param_init_net)
        ws.CreateNet(self.model.net)
Exemplo n.º 17
0
    def test_net_conversion_and_append_net(self):
        other = model_helper.ModelHelper()
        fc1 = brew.fc(other, "data", "other_fc1", dim_in=3*227*227, dim_out=10)
        fc2 = brew.fc(other, fc1, "other_fc2", dim_in=10, dim_out=10)
        brew.fc(other, fc2, "other_fc3", dim_in=10, dim_out=10)

        def add_input_ops(model):
            model.net.UniformFill([], ["data"], shape=[4, 227, 227, 3])
            model.net.UniformFill([], ["label"], shape=[4])

        def add_model_ops(model, loss_scale):
            model.NHWC2NCHW("data", "data_nchw")
            model.Conv("data_nchw", 'conv1', 3, 64,
                       weight_init=("MSRAFill", {}), kernel=7,
                       stride=2, pad=3, no_bias=0)
            model.SpatialBN('conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3, is_test=False)
            model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu')
            model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
            model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=10)

            # Append the net and param_init_net of the other model
            appendnet = data_parallel_model.ConvertNetForDevice(other.net)
            model.net.AppendNet(appendnet)

            model.param_init_net.AppendNet(
                data_parallel_model.ConvertNetForDevice(other.param_init_net))

            model.Sigmoid('fc', 'fc_sigm')
            model.Softmax('fc_sigm', 'softmax')
            loss = model.AveragedLoss('softmax', 'loss')
            return [loss]

        def add_optimizer(model):
            optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9)

        model = cnn.CNNModelHelper(
            order="NCHW",
            name="test",
        )
        data_parallel_model.Parallelize_CPU(
            model,
            input_builder_fun=add_input_ops,
            forward_pass_builder_fun=add_model_ops,
            optimizer_builder_fun=add_optimizer,
            devices=range(4)
        )

        # Just create and run net and confirm no exception is thrown
        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)
        workspace.RunNet(model.net)
Exemplo n.º 18
0
    def test_extract_simple(self):
        from caffe2.python import brew
        from caffe2.python.model_helper import ModelHelper, ExtractPredictorNet

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        [data, label] = brew.image_input(
            model,
            "reader", ["xx/data", "label"],
            is_test=1,
        )
        cnv = brew.conv(model, data, 'cnv', 32, 32, 4)
        a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'pred', 200, 5)
        brew.softmax(model, [pred, label], "softmax")

        (predict_net, export_blobs) = ExtractPredictorNet(
            net_proto=model.net.Proto(),
            input_blobs=["xx/data"],
            output_blobs=["pred"],
            renames={"xx/data": "image"},
        )
        export_blobs = set(export_blobs)

        ops = list(predict_net.Proto().op)
        for op in ops:
            self.assertFalse(op.type == "Softmax")
            self.assertFalse("xx/data" in op.input)

        # Note: image input should not be included
        self.assertEquals(ops[0].type, "Conv")
        self.assertEquals(ops[1].type, "FC")
        self.assertEquals(ops[2].type, "FC")
        self.assertEquals(len(ops), 3)

        # test rename happened
        self.assertEquals(ops[0].input[0], "image")

        # Check export blobs
        self.assertTrue("image" not in export_blobs)
        self.assertTrue("xx/data" not in export_blobs)
        self.assertEqual(set([str(p) for p in model.params]), export_blobs)

        # Check external inputs/outputs
        self.assertTrue("image" in predict_net.Proto().external_input)
        self.assertEquals(set(["pred"]), set(predict_net.Proto().external_output))
        self.assertEqual(
            set(predict_net.Proto().external_input) -
            set([str(p) for p in model.params]), set(["image"])
        )
Exemplo n.º 19
0
    def test_fc_fp16_initializer(self):
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1)

        # default operator, pFP16Initializer
        fc2 = brew.fc(model, fc1, "fc2", dim_in=1, dim_out=1,
                      WeightInitializer=pFP16Initializer
        )

        # specified operator, pFP16Initializer
        fc3 = brew.fc(model, fc2, "fc3", dim_in=1, dim_out=1,
                      weight_init=("ConstantFill", {}),
                      WeightInitializer=pFP16Initializer
        )
Exemplo n.º 20
0
    def _createDense(self, dtype=core.DataType.FLOAT):
        perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
        np.random.seed(123)  # make test deterministic
        numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
        initializer = Initializer if dtype == core.DataType.FLOAT else \
            PseudoFP16Initializer
        data = np.random.randint(
            2,
            size=(20, perfect_model.size)).astype(numpy_dtype)
        label = np.dot(data, perfect_model)[:, np.newaxis]

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        out = brew.fc(
            model,
            'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}),
            ('ConstantFill', {}), axis=0,
            WeightInitializer=initializer, BiasInitializer=initializer
        )
        if dtype == core.DataType.FLOAT16:
            out = model.HalfToFloat(out, out + "_fp32")
        sq = model.SquaredL2Distance([out, 'label'])
        loss = model.AveragedLoss(sq, "avg_loss")
        grad_map = model.AddGradientOperators([loss])
        self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
        return (model, perfect_model, data, label)
Exemplo n.º 21
0
def _calc_attention_logits_from_sum_match(
    model,
    decoder_hidden_encoder_outputs_sum,
    encoder_output_dim,
    scope,
):
    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Tanh(
        decoder_hidden_encoder_outputs_sum,
        decoder_hidden_encoder_outputs_sum,
    )

    # [encoder_length, batch_size, 1]
    attention_logits = brew.fc(
        model,
        decoder_hidden_encoder_outputs_sum,
        s(scope, 'attention_logits'),
        dim_in=encoder_output_dim,
        dim_out=1,
        axis=2,
        freeze_bias=True,
    )

    # [batch_size, encoder_length, 1]
    attention_logits_transposed = brew.transpose(
        model,
        attention_logits,
        s(scope, 'attention_logits_transposed'),
        axes=[1, 0, 2],
    )
    return attention_logits_transposed
Exemplo n.º 22
0
    def testShapeInferenceConvNet(self):
        model = model_helper.ModelHelper(name="convtest")
        model.NHWC2NCHW("data", "data_nchw")
        brew.conv(model, "data_nchw", 'conv1', 3, 64,
                   weight_init=("MSRAFill", {}), kernel=7,
                   stride=2, pad=3, no_bias=0)
        brew.spatial_bn(model, 'conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3, is_test=False)
        brew.relu(model, 'conv1_spatbn_relu', 'conv1_spatbn_relu')
        brew.max_pool(model, 'conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
        brew.fc(model, 'pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=100)
        brew.dropout(model, 'fc', 'fc_drop', is_test=False)
        model.Sigmoid('fc_drop', 'fc_sigm')
        brew.softmax(model, 'fc_sigm', 'softmax')
        model.LabelCrossEntropy(['softmax', 'label'], 'xent')
        loss = model.AveragedLoss('xent', 'loss')

        model.AddGradientOperators([loss])

        LR = model.param_init_net.ConstantFill(
            [], 'LR', shape=[1], value=0.1
        )

        for param in model.GetParams():
            param_grad = model.param_to_grad[param]
            param_momentum = model.param_init_net.ConstantFill(
                [param], param + '_momentum', value=0.0
            )
            model.net.MomentumSGDUpdate(
                [param_grad, param_momentum, LR, param],
                [param_grad, param_momentum, param],
            )

        workspace.FeedBlob(
            "data",
            np.random.rand(16, 227, 227, 3).astype(np.float32),
        )
        workspace.FeedBlob(
            "label",
            (100 * np.random.rand(16)).astype(np.int32),
        )
        workspace.FeedBlob(
            "label",
            (100 * np.random.rand(16)).astype(np.int32),
        )
        # Then do automatic comparison test: run the next once to
        # initialize everything
        self.InferTensorRunAndCompare(model)
Exemplo n.º 23
0
    def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        m.Proto().type = "dag"
        m.Proto().num_workers = 4

        with core.NameScope("name_x"):
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)

            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)

            # Branch
            fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
            fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
            fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)

            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")

            fc5.Relu([], fc5sum) \
               .Softmax([], "pred1") \
               .LabelCrossEntropy(["label"], ["xent1"]) \
               .AveragedLoss([], "loss1")
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
            fc6.Relu([], fc6) \
               .Softmax([], "pred2") \
               .LabelCrossEntropy(["label"], ["xent2"]) \
               .AveragedLoss([], "loss2")

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.optimize_inference_for_dag(
            m.net, ["name_x/data"], "name_x"
        )
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)

        # Test networks produce exactly same results
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss1 = workspace.FetchBlob("name_x/loss1")
        loss2 = workspace.FetchBlob("name_x/loss2")
        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
        np.testing.assert_almost_equal(loss1, optimized_loss1)
        np.testing.assert_almost_equal(loss2, optimized_loss2)
Exemplo n.º 24
0
    def CreateModel(self):
        log.debug("Start training")
        model = model_helper.ModelHelper(name="char_rnn")

        input_blob, seq_lengths, hidden_init, cell_init, target = \
            model.net.AddExternalInputs(
                'input_blob',
                'seq_lengths',
                'hidden_init',
                'cell_init',
                'target',
            )

        hidden_output_all, self.hidden_output, _, self.cell_state = LSTM(
            model, input_blob, seq_lengths, (hidden_init, cell_init),
            self.D, self.hidden_size, scope="LSTM")
        output = brew.fc(
            model,
            hidden_output_all,
            None,
            dim_in=self.hidden_size,
            dim_out=self.D,
            axis=2
        )

        # axis is 2 as first two are T (time) and N (batch size).
        # We treat them as one big batch of size T * N
        softmax = model.net.Softmax(output, 'softmax', axis=2)

        softmax_reshaped, _ = model.net.Reshape(
            softmax, ['softmax_reshaped', '_'], shape=[-1, self.D])

        # Create a copy of the current net. We will use it on the forward
        # pass where we don't need loss and backward operators
        self.forward_net = core.Net(model.net.Proto())

        xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent')
        # Loss is average both across batch and through time
        # Thats why the learning rate below is multiplied by self.seq_length
        loss = model.net.AveragedLoss(xent, 'loss')
        model.AddGradientOperators([loss])

        # use build_sdg function to build an optimizer
        build_sgd(
            model,
            base_learning_rate=0.1 * self.seq_length,
            policy="step",
            stepsize=1,
            gamma=0.9999
        )

        self.model = model
        self.predictions = softmax
        self.loss = loss

        self.prepare_state = core.Net("prepare_state")
        self.prepare_state.Copy(self.hidden_output, hidden_init)
        self.prepare_state.Copy(self.cell_state, cell_init)
Exemplo n.º 25
0
 def _create_model(self):
     m = model_helper.ModelHelper()
     y = brew.fc(m, "data", "y",
                 dim_in=4, dim_out=2,
                 weight_init=('ConstantFill', dict(value=1.0)),
                 bias_init=('ConstantFill', dict(value=0.0)),
                 axis=0)
     m.net.AddExternalOutput(y)
     return m
Exemplo n.º 26
0
    def testShapeInferenceSimpleFC(self):
        m = model_helper.ModelHelper(name="test_model")

        brew.fc(m, "data", "fc1", dim_in=96, dim_out=32)
        brew.fc(m, "fc1", "fc2", dim_in=32, dim_out=55)

        (shapes, types) = workspace.InferShapesAndTypes(
            [m.param_init_net, m.net],
            {'data': [64, 96]}
        )

        self.assertEquals(shapes['data'], [64, 96])
        self.assertEquals(shapes['fc1_w'], [32, 96])
        self.assertEquals(shapes['fc1_b'], [32])
        self.assertEquals(shapes['fc1'], [64, 32])
        self.assertEquals(shapes['fc2_w'], [55, 32])
        self.assertEquals(shapes['fc2_b'], [55])
        self.assertEquals(shapes['fc2'], [64, 55])
Exemplo n.º 27
0
def AddMLPModel(model, data):
    size = 28 * 28 * 1
    sizes = [size, size * 2, size * 2, 10]
    layer = data
    for i in range(len(sizes) - 1):
        layer = brew.fc(model, layer, 'dense_{}'.format(i), dim_in=sizes[i], dim_out=sizes[i + 1])
        layer = model.net.Relu(layer, 'relu_{}'.format(i))
    softmax = model.net.Softmax(layer, 'softmax')
    return softmax
Exemplo n.º 28
0
 def prepare_input(self, model, input_blob):
     return brew.fc(
         model,
         input_blob,
         self.scope('i2h'),
         dim_in=self.input_size,
         dim_out=3 * self.hidden_size,
         axis=2,
     )
    def test_compute_histogram_for_blobs_modify_output_record(self):
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2)

        # no operator name set, will use default
        brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1)

        num_buckets = 20
        lower_bound = 0.2
        upper_bound = 0.8
        accumulate = False
        net_modifier = ComputeHistogramForBlobs(blobs=['fc1_w', 'fc2_w'],
                                                logging_frequency=10,
                                                num_buckets=num_buckets,
                                                lower_bound=lower_bound,
                                                upper_bound=upper_bound,
                                                accumulate=accumulate)
        net_modifier(model.net, modify_output_record=True)

        workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32))

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        fc1_w = workspace.FetchBlob('fc1_w')
        fc1_w_curr_normalized_hist = workspace.FetchBlob('fc1_w_curr_normalized_hist')
        cur_hist, acc_hist = self.histogram(fc1_w,
                                            lower_bound=lower_bound,
                                            upper_bound=upper_bound,
                                            num_buckets=num_buckets)

        self.assertEqual(fc1_w_curr_normalized_hist.size, num_buckets + 2)
        self.assertAlmostEqual(np.linalg.norm(
            fc1_w_curr_normalized_hist - cur_hist), 0.0, delta=1e-5)
        self.assertEqual(len(model.net.Proto().op), 12)

        assert 'fc1_w' + net_modifier.field_name_suffix() in\
            model.net.output_record().field_blobs(),\
            model.net.output_record().field_blobs()
        assert 'fc2_w' + net_modifier.field_name_suffix() in\
            model.net.output_record().field_blobs(),\
            model.net.output_record().field_blobs()
Exemplo n.º 30
0
    def test_fc_initializer(self):
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1)

        # no operator name set, will use default
        fc2 = brew.fc(model, fc1, "fc2", dim_in=1, dim_out=1,
                      WeightInitializer=Initializer)

        # no operator name set, will use custom
        fc3 = brew.fc(model, fc2, "fc3", dim_in=1, dim_out=1,
                      WeightInitializer=Initializer,
                      weight_init=("ConstantFill", {}),
        )

        # operator name set, no initializer class set
        fc4 = brew.fc(model, fc3, "fc4", dim_in=1, dim_out=1,
                      WeightInitializer=None,
                      weight_init=("ConstantFill", {})
        )
Exemplo n.º 31
0
def Inception(order, cudnn_ws, model_path=""):
    my_arg_scope = {
        'order': order,
        'use_cudnn': True,
        'cudnn_exhaustive_search': True,
    }
    if cudnn_ws:
        my_arg_scope['ws_nbytes_limit'] = cudnn_ws
    model = model_helper.ModelHelper(
        name="inception",
        arg_scope=my_arg_scope,
    )
    conv1 = brew.conv(
        model,
        "data",
        "conv1",
        3,
        64,
        7,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        stride=2,
        pad=3,
    )
    relu1 = brew.relu(model, conv1, "conv1")
    pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2, pad=1)
    conv2a = brew.conv(
        model, pool1, "conv2a", 64, 64, 1, ('XavierFill', {}),
        ('ConstantFill', {})
    )
    conv2a = brew.relu(model, conv2a, conv2a)
    conv2 = brew.conv(
        model,
        conv2a,
        "conv2",
        64,
        192,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu2 = brew.relu(model, conv2, "conv2")
    pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2, pad=1)
    # Inception modules
    inc3 = _InceptionModule(
        model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32
    )
    inc4 = _InceptionModule(
        model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64
    )
    pool5 = brew.max_pool(model, inc4, "pool5", kernel=3, stride=2, pad=1)
    inc5 = _InceptionModule(
        model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64
    )
    inc6 = _InceptionModule(
        model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64
    )
    inc7 = _InceptionModule(
        model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64
    )
    inc8 = _InceptionModule(
        model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64
    )
    inc9 = _InceptionModule(
        model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128
    )
    pool9 = brew.max_pool(model, inc9, "pool9", kernel=3, stride=2, pad=1)
    inc10 = _InceptionModule(
        model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128
    )
    inc11 = _InceptionModule(
        model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128
    )
    pool11 = brew.average_pool(model, inc11, "pool11", kernel=7, stride=1)
    fc = brew.fc(
        model, pool11, "fc", 1024, 1000, ('XavierFill', {}),
        ('ConstantFill', {})
    )
    # It seems that Soumith's benchmark does not have softmax on top
    # for Inception. We will add it anyway so we can have a proper
    # backward pass.
    pred = brew.softmax(model, fc, "pred")
    xent = model.net.LabelCrossEntropy([pred, "label"], "xent")
    model.net.AveragedLoss(xent, "loss")
    return model, 224
Exemplo n.º 32
0
def Add_Original_CIFAR10_Model(model, data, num_classes, image_height,
                               image_width, image_channels):
    # Convolutional layer 1
    conv1 = brew.conv(model,
                      data,
                      'conv1',
                      dim_in=image_channels,
                      dim_out=32,
                      kernel=5,
                      stride=1,
                      pad=2)
    h, w = update_dims(height=image_height,
                       width=image_width,
                       kernel=5,
                       stride=1,
                       pad=2)
    # Pooling layer 1
    pool1 = brew.max_pool(model, conv1, 'pool1', kernel=3, stride=2)
    h, w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
    # ReLU layer 1
    relu1 = brew.relu(model, pool1, 'relu1')

    # Convolutional layer 2
    conv2 = brew.conv(model,
                      relu1,
                      'conv2',
                      dim_in=32,
                      dim_out=32,
                      kernel=5,
                      stride=1,
                      pad=2)
    h, w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2)
    # ReLU layer 2
    relu2 = brew.relu(model, conv2, 'relu2')
    # Pooling layer 1
    pool2 = brew.average_pool(model, relu2, 'pool2', kernel=3, stride=2)
    h, w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)

    # Convolutional layer 3
    conv3 = brew.conv(model,
                      pool2,
                      'conv3',
                      dim_in=32,
                      dim_out=64,
                      kernel=5,
                      stride=1,
                      pad=2)
    h, w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2)
    # ReLU layer 3
    relu3 = brew.relu(model, conv3, 'relu3')
    # Pooling layer 3
    pool3 = brew.average_pool(model, relu3, 'pool3', kernel=3, stride=2)
    h, w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)

    # Fully connected layers
    fc1 = brew.fc(model, pool3, 'fc1', dim_in=64 * h * w, dim_out=64)
    fc2 = brew.fc(model, fc1, 'fc2', dim_in=64, dim_out=num_classes)

    # Softmax layer
    softmax = brew.softmax(model, fc2, 'softmax')
    return softmax
Exemplo n.º 33
0
    def test_net_conversion_and_append_net(self):
        other = model_helper.ModelHelper()
        fc1 = brew.fc(other,
                      "data",
                      "other_fc1",
                      dim_in=3 * 227 * 227,
                      dim_out=10)
        fc2 = brew.fc(other, fc1, "other_fc2", dim_in=10, dim_out=10)
        brew.fc(other, fc2, "other_fc3", dim_in=10, dim_out=10)

        def add_input_ops(model):
            model.net.UniformFill([], ["data"], shape=[4, 227, 227, 3])
            model.net.UniformFill([], ["label"], shape=[4])

        def add_model_ops(model, loss_scale):
            model.NHWC2NCHW("data", "data_nchw")
            model.Conv("data_nchw",
                       'conv1',
                       3,
                       64,
                       weight_init=("MSRAFill", {}),
                       kernel=7,
                       stride=2,
                       pad=3,
                       no_bias=0)
            model.SpatialBN('conv1',
                            'conv1_spatbn_relu',
                            64,
                            epsilon=1e-3,
                            is_test=False)
            model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu')
            model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
            model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=10)

            # Append the net and param_init_net of the other model
            appendnet = data_parallel_model.ConvertNetForDevice(other.net)
            model.net.AppendNet(appendnet)

            model.param_init_net.AppendNet(
                data_parallel_model.ConvertNetForDevice(other.param_init_net))

            model.Sigmoid('fc', 'fc_sigm')
            model.Softmax('fc_sigm', 'softmax')
            loss = model.AveragedLoss('softmax', 'loss')
            return [loss]

        def add_optimizer(model):
            optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9)

        model = cnn.CNNModelHelper(
            order="NCHW",
            name="test",
        )
        data_parallel_model.Parallelize_CPU(
            model,
            input_builder_fun=add_input_ops,
            forward_pass_builder_fun=add_model_ops,
            optimizer_builder_fun=add_optimizer,
            devices=range(4))

        # Just create and run net and confirm no exception is thrown
        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)
        workspace.RunNet(model.net)
def add_osme_branch(model, config, feature, seq):
    ''' add OSME module
    model net architecture:
        feature -> avgpool -> fc[1] -> relu -> fc[2] -> sigmoid ->
        mul ->(avgpool -> fc[3] ->) attention
    '''
    # set weight initialization method
    init_policy = "XavierFill" if seq == 1 else "MSRAFill"
    # GAP
    avg_pool = brew.average_pool(
        model,
        feature,
        'osme_GAP1_{}'.format(seq),
        kernel = config['model_arch']['last_conv_size'],
        stride = 1,
    )

    # fc1 (need different initializer)
    fc1 = brew.fc(
        model,
        avg_pool,
        'osme_fc1_{}'.format(seq),
        dim_in=config['model_arch']['feature_dim'],
        dim_out=config['model_arch']['feature_dim'] // config['model_arch']['r'],
        weight_init=(init_policy, {}),
    )

    # relu
    fc1_relu = brew.relu(model, fc1, fc1)
    # fc2
    fc2 = brew.fc(
        model,
        fc1_relu,
        'osme_fc2_{}'.format(seq),
        dim_in=config['model_arch']['feature_dim'] // config['model_arch']['r'],
        dim_out=config['model_arch']['feature_dim'],
        weight_init=(init_policy, {}),
    )

    # sigmoid
    mask = model.net.Sigmoid(fc2, 'osme_mask_{}'.format(seq))
    # channel-wise mul
    attention = model.net.Mul(
        [feature, mask],
        ['osme_excitation_{}'.format(seq)],
        broadcast=1, axis=0,
    )

    # one more GAP
    attention_gap = brew.average_pool(
        model,
        attention,
        'osme_GAP2_{}'.format(seq),
        kernel = config['model_arch']['last_conv_size'],
        stride = 1,
    )
    # fc3
    att_feature = brew.fc(
        model,
        attention_gap,
        'attention_{}'.format(seq),
        dim_in=config['model_arch']['feature_dim'],
        dim_out=config['model_arch']['attention_dim'],
        weight_init=(init_policy, {}),
    )

    return att_feature
Exemplo n.º 35
0
def create_vgg(
    model,
    data,
    num_input_channels,
    num_labels,
    num_layers=11,
    is_test=False,
):

    if num_layers == 11:  # VGG configuration A
        first_layers_count = 1
        last_layers_count = 2
    elif num_layers == 13:  # VGG configuration D
        first_layers_count = 2
        last_layers_count = 2
    elif num_layers == 16:  # VGG configuration D
        first_layers_count = 2
        last_layers_count = 3
    elif num_layers == 19:  # VGG configuration E
        first_layers_count = 2
        last_layers_count = 4
    else:
        raise NotImplementedError(
            "not currently supported: try one of {11, 13, 16, 19}, corresponding to VGG A, B, D, and E."
        )

    conv1 = brew.conv(
        model,
        data,
        "conv1",
        num_input_channels,
        64,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu1 = brew.relu(model, conv1, "conv1")
    for i in range(0, first_layers_count - 1):
        conv1 = brew.conv(
            model,
            relu1,
            "conv1{}".format(i),
            64,
            64,
            3,
            ('XavierFill', {}),
            ('ConstantFill', {}),
            pad=1,
        )
        relu1 = brew.relu(model, conv1, "conv1{}".format(i))

    pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2)
    conv2 = brew.conv(
        model,
        pool1,
        "conv2",
        64,
        128,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu2 = brew.relu(model, conv2, "conv2")
    for i in range(0, first_layers_count - 1):
        conv2 = brew.conv(
            model,
            relu2,
            "conv2{}".format(i),
            128,
            128,
            3,
            ('XavierFill', {}),
            ('ConstantFill', {}),
            pad=1,
        )
        relu2 = brew.relu(model, conv2, "conv2{}".format(i))

    pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2)
    conv3 = brew.conv(
        model,
        pool2,
        "conv3",
        128,
        256,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu3 = brew.relu(model, conv3, "conv3")
    for i in range(0, last_layers_count - 1):
        conv4 = brew.conv(
            model,
            relu3,
            "conv4{}".format(i),
            256,
            256,
            3,
            ('XavierFill', {}),
            ('ConstantFill', {}),
            pad=1,
        )
        relu4 = brew.relu(model, conv4, "conv4{}".format(i))
    pool4 = brew.max_pool(model, relu4, "pool4", kernel=2, stride=2)
    conv5 = brew.conv(
        model,
        pool4,
        "conv5",
        256,
        512,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu5 = brew.relu(model, conv5, "conv5")
    for i in range(0, last_layers_count - 1):
        conv6 = brew.conv(
            model,
            relu5,
            "conv6{}".format(i),
            512,
            512,
            3,
            ('XavierFill', {}),
            ('ConstantFill', {}),
            pad=1,
        )
        relu6 = brew.relu(model, conv6, "conv6{}".format(i))
    pool6 = brew.max_pool(model, relu6, "pool6", kernel=2, stride=2)
    conv7 = brew.conv(
        model,
        pool6,
        "conv7",
        512,
        512,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu7 = brew.relu(model, conv7, "conv7")
    for i in range(0, last_layers_count - 1):
        conv8 = brew.conv(
            model,
            relu7,
            "conv8{}".format(i),
            512,
            512,
            3,
            ('XavierFill', {}),
            ('ConstantFill', {}),
            pad=1,
        )
        relu8 = brew.relu(model, conv8, "conv8{}".format(i))
    pool8 = brew.max_pool(model, relu8, "pool8", kernel=2, stride=2)

    fcix = brew.fc(model, pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
                   ('ConstantFill', {}))
    reluix = brew.relu(model, fcix, "fcix")
    fcx = brew.fc(model, reluix, "fcx", 4096, 4096, ('XavierFill', {}),
                  ('ConstantFill', {}))
    relux = brew.relu(model, fcx, "fcx")
    fcxi = brew.fc(model, relux, "fcxi", 4096, num_labels, ('XavierFill', {}),
                   ('ConstantFill', {}))

    return fcxi
Exemplo n.º 36
0
def simple_fc():
    model = ModelHelper(name="r")
    brew.fc(model, "data", "fc", 10, 10)
    return model, [(1, 10)]
Exemplo n.º 37
0
def AlexNet(order, cudnn_ws, ideep):
    my_arg_scope = {
        'order': order,
        'use_cudnn': True,
        'cudnn_exhaustive_search': True,
        'ws_nbytes_limit': str(cudnn_ws)
    }
    model = ModelHelper(name="alexnet", arg_scope=my_arg_scope)
    conv1 = brew.conv(model,
                      "data",
                      "conv1",
                      3,
                      64,
                      11, ('XavierFill', {}), ('ConstantFill', {}),
                      stride=4,
                      pad=2)
    relu1 = brew.relu(model, conv1, "conv1")
    pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2)
    conv2 = brew.conv(model,
                      pool1,
                      "conv2",
                      64,
                      192,
                      5, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=2)
    relu2 = brew.relu(model, conv2, "conv2")
    pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2)
    conv3 = brew.conv(model,
                      pool2,
                      "conv3",
                      192,
                      384,
                      3, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=1)
    relu3 = brew.relu(model, conv3, "conv3")
    conv4 = brew.conv(model,
                      relu3,
                      "conv4",
                      384,
                      256,
                      3, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=1)
    relu4 = brew.relu(model, conv4, "conv4")
    conv5 = brew.conv(model,
                      relu4,
                      "conv5",
                      256,
                      256,
                      3, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=1)
    relu5 = brew.relu(model, conv5, "conv5")
    pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2)
    fc6 = brew.fc(model, pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
                  ('ConstantFill', {}))
    relu6 = brew.relu(model, fc6, "fc6")
    fc7 = brew.fc(model, relu6, "fc7", 4096, 4096, ('XavierFill', {}),
                  ('ConstantFill', {}))
    relu7 = brew.relu(model, fc7, "fc7")
    fc8 = brew.fc(model, relu7, "fc8", 4096, 1000, ('XavierFill', {}),
                  ('ConstantFill', {}))
    pred = brew.softmax(model, fc8, "pred")
    xent = model.LabelCrossEntropy([pred, "label"], "xent")
    loss = model.AveragedLoss(xent, "loss")
    return model, 224
Exemplo n.º 38
0
def OverFeat(order, cudnn_ws, model_path=""):
    my_arg_scope = {
        'order': order,
        'use_cudnn': True,
        'cudnn_exhaustive_search': True,
    }
    if cudnn_ws:
        my_arg_scope['ws_nbytes_limit'] = cudnn_ws
    model = model_helper.ModelHelper(
        name="overfeat",
        arg_scope=my_arg_scope,
    )
    conv1 = brew.conv(
        model,
        "data",
        "conv1",
        3,
        96,
        11,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        stride=4,
    )
    relu1 = brew.relu(model, conv1, "conv1")
    pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2)
    conv2 = brew.conv(
        model, pool1, "conv2", 96, 256, 5, ('XavierFill', {}),
        ('ConstantFill', {})
    )
    relu2 = brew.relu(model, conv2, "conv2")
    pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2)
    conv3 = brew.conv(
        model,
        pool2,
        "conv3",
        256,
        512,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu3 = brew.relu(model, conv3, "conv3")
    conv4 = brew.conv(
        model,
        relu3,
        "conv4",
        512,
        1024,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu4 = brew.relu(model, conv4, "conv4")
    conv5 = brew.conv(
        model,
        relu4,
        "conv5",
        1024,
        1024,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu5 = brew.relu(model, conv5, "conv5")
    pool5 = brew.max_pool(model, relu5, "pool5", kernel=2, stride=2)
    fc6 = brew.fc(
        model, pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}),
        ('ConstantFill', {})
    )
    relu6 = brew.relu(model, fc6, "fc6")
    fc7 = brew.fc(
        model, relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})
    )
    relu7 = brew.relu(model, fc7, "fc7")
    fc8 = brew.fc(
        model, relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
    )
    pred = brew.softmax(model, fc8, "pred")
    xent = model.net.LabelCrossEntropy([pred, "label"], "xent")
    model.net.AveragedLoss(xent, "loss")
    return model, 231
Exemplo n.º 39
0
 def FC(self, *args, **kwargs):
     return brew.fc(self, *args, **kwargs)
Exemplo n.º 40
0
def VGGA(order, cudnn_ws, model_path=""):
    my_arg_scope = {
        'order': order,
        'use_cudnn': True,
        'cudnn_exhaustive_search': True,
    }
    if cudnn_ws:
        my_arg_scope['ws_nbytes_limit'] = cudnn_ws
    model = model_helper.ModelHelper(
        name="vgga",
        arg_scope=my_arg_scope,
    )
    conv1 = brew.conv(
        model,
        "data",
        "conv1",
        3,
        64,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu1 = brew.relu(model, conv1, "conv1")
    pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2)
    conv2 = brew.conv(
        model,
        pool1,
        "conv2",
        64,
        128,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu2 = brew.relu(model, conv2, "conv2")
    pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2)
    conv3 = brew.conv(
        model,
        pool2,
        "conv3",
        128,
        256,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu3 = brew.relu(model, conv3, "conv3")
    conv4 = brew.conv(
        model,
        relu3,
        "conv4",
        256,
        256,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu4 = brew.relu(model, conv4, "conv4")
    pool4 = brew.max_pool(model, relu4, "pool4", kernel=2, stride=2)
    conv5 = brew.conv(
        model,
        pool4,
        "conv5",
        256,
        512,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu5 = brew.relu(model, conv5, "conv5")
    conv6 = brew.conv(
        model,
        relu5,
        "conv6",
        512,
        512,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu6 = brew.relu(model, conv6, "conv6")
    pool6 = brew.max_pool(model, relu6, "pool6", kernel=2, stride=2)
    conv7 = brew.conv(
        model,
        pool6,
        "conv7",
        512,
        512,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu7 = brew.relu(model, conv7, "conv7")
    conv8 = brew.conv(
        model,
        relu7,
        "conv8",
        512,
        512,
        3,
        ('XavierFill', {}),
        ('ConstantFill', {}),
        pad=1,
    )
    relu8 = brew.relu(model, conv8, "conv8")
    pool8 = brew.max_pool(model, relu8, "pool8", kernel=2, stride=2)

    fcix = brew.fc(
        model, pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
        ('ConstantFill', {})
    )
    reluix = brew.relu(model, fcix, "fcix")
    fcx = brew.fc(
        model, reluix, "fcx", 4096, 4096, ('XavierFill', {}),
        ('ConstantFill', {})
    )
    relux = brew.relu(model, fcx, "fcx")
    fcxi = brew.fc(
        model, relux, "fcxi", 4096, 1000, ('XavierFill', {}),
        ('ConstantFill', {})
    )
    pred = brew.softmax(model, fcxi, "pred")
    xent = model.net.LabelCrossEntropy([pred, "label"], "xent")
    model.net.AveragedLoss(xent, "loss")
    return model, 231
Exemplo n.º 41
0
    def _apply(
        self,
        model,
        input_t,
        seq_lengths,
        states,
        timestep,
        extra_inputs=None,
    ):
        hidden_t_prev = states[0]

        # Split input tensors to get inputs for each gate.
        input_t_reset, input_t_update, input_t_output = model.net.Split(
            [
                input_t,
            ],
            [
                self.scope('input_t_reset'),
                self.scope('input_t_update'),
                self.scope('input_t_output'),
            ],
            axis=2,
        )

        # Fully connected layers for reset and update gates.
        reset_gate_t = brew.fc(
            model,
            hidden_t_prev,
            self.scope('reset_gate_t'),
            dim_in=self.hidden_size,
            dim_out=self.hidden_size,
            axis=2,
        )
        update_gate_t = brew.fc(
            model,
            hidden_t_prev,
            self.scope('update_gate_t'),
            dim_in=self.hidden_size,
            dim_out=self.hidden_size,
            axis=2,
        )

        # Calculating the modified hidden state going into output gate.
        reset_gate_t = model.net.Sum(
            [reset_gate_t, input_t_reset],
            self.scope('reset_gate_t')
        )
        reset_gate_t_sigmoid = model.net.Sigmoid(
            reset_gate_t,
            self.scope('reset_gate_t_sigmoid')
        )

        # `self.linear_before_reset = True` matches cudnn semantics
        if self.linear_before_reset:
            output_gate_fc = brew.fc(
                model,
                hidden_t_prev,
                self.scope('output_gate_t'),
                dim_in=self.hidden_size,
                dim_out=self.hidden_size,
                axis=2,
            )
            output_gate_t = model.net.Mul(
                [reset_gate_t_sigmoid, output_gate_fc],
                self.scope('output_gate_t_mul')
            )
        else:
            modified_hidden_t_prev = model.net.Mul(
                [reset_gate_t_sigmoid, hidden_t_prev],
                self.scope('modified_hidden_t_prev')
            )
            output_gate_t = brew.fc(
                model,
                modified_hidden_t_prev,
                self.scope('output_gate_t'),
                dim_in=self.hidden_size,
                dim_out=self.hidden_size,
                axis=2,
            )

        # Add input contributions to update and output gate.
        # We already (in-place) added input contributions to the reset gate.
        update_gate_t = model.net.Sum(
            [update_gate_t, input_t_update],
            self.scope('update_gate_t'),
        )
        output_gate_t = model.net.Sum(
            [output_gate_t, input_t_output],
            self.scope('output_gate_t_summed'),
        )

        # Join gate outputs and add input contributions
        gates_t, _gates_t_concat_dims = model.net.Concat(
            [
                reset_gate_t,
                update_gate_t,
                output_gate_t,
            ],
            [
                self.scope('gates_t'),
                self.scope('_gates_t_concat_dims'),
            ],
            axis=2,
        )

        if seq_lengths is not None:
            inputs = [hidden_t_prev, gates_t, seq_lengths, timestep]
        else:
            inputs = [hidden_t_prev, gates_t, timestep]

        hidden_t = model.net.GRUUnit(
            inputs,
            list(self.get_state_names()),
            forget_bias=self.forget_bias,
            drop_states=self.drop_states,
            sequence_lengths=(seq_lengths is not None),
        )
        model.net.AddExternalOutputs(hidden_t)
        return (hidden_t,)
Exemplo n.º 42
0
def addModel(model, data):
    channels = 50
    channels2 = 200
    kernel_size = 3
    if model.init_params:
        weight = model.param_init_net.XavierFill(
            [], 'conv1' + '_w', shape=[channels, 1, 1, kernel_size])
        bias = model.param_init_net.ConstantFill([],
                                                 'conv1' + '_b',
                                                 shape=[
                                                     channels,
                                                 ])
    else:
        weight = core.ScopedBlobReference('conv1' + '_w', model.param_init_net)
        bias = core.ScopedBlobReference('conv1' + '_b', model.param_init_net)

    model.params.extend([weight, bias])
    model.weights.append(weight)
    model.biases.append(bias)

    conv1 = model.net.Conv([data, weight, bias],
                           'conv1',
                           dim_in=1,
                           dim_out=channels,
                           kernel_h=1,
                           kernel_w=kernel_size)
    #conv1 = brew.conv(model, data, 'conv1', 1, 2, 5)
    pool1 = brew.max_pool(model,
                          conv1,
                          'pool1',
                          kernel_h=1,
                          kernel_w=2,
                          stride=2)
    pool_dim_out = (41 - kernel_size) / 2
    if model.init_params:
        weight2 = model.param_init_net.XavierFill(
            [], 'conv2' + '_w', shape=[channels2, channels, 1, kernel_size])
        bias2 = model.param_init_net.ConstantFill([],
                                                  'conv2' + '_b',
                                                  shape=[
                                                      channels2,
                                                  ])
    else:
        weight2 = core.ScopedBlobReference('conv2' + '_w',
                                           model.param_init_net)
        bias2 = core.ScopedBlobReference('conv2' + '_b', model.param_init_net)

    model.params.extend([weight2, bias2])
    model.weights.append(weight2)
    model.biases.append(bias2)
    conv2 = model.net.Conv([pool1, weight2, bias2],
                           'conv2',
                           dim_in=channels,
                           dim_out=channels2,
                           kernel_h=1,
                           kernel_w=kernel_size)
    pool2 = brew.max_pool(model,
                          conv2,
                          'pool2',
                          kernel_h=1,
                          kernel_w=2,
                          stride=2)
    pool_dim_out_2 = (pool_dim_out + 1 - kernel_size) / 2
    fc3 = brew.fc(model,
                  pool2,
                  'fc3',
                  dim_in=pool_dim_out_2 * channels2,
                  dim_out=1000)
    fc3 = brew.relu(model, fc3, fc3)
    pred = brew.fc(model, fc3, 'pred', 1000, 2)
    #print(workspace.FetchBlob('pred_w'))
    softmax = brew.softmax(model, pred, 'softmax')
    return softmax
Exemplo n.º 43
0
def create_resnet50(
    model,
    data,
    num_input_channels,
    num_labels,
    label=None,
    is_test=False,
    no_loss=False,
    no_bias=0,
    conv1_kernel=7,
    conv1_stride=2,
    final_avg_kernel=7,
):
    # conv1 + maxpool
    brew.conv(model,
              data,
              'conv1',
              num_input_channels,
              64,
              weight_init=("MSRAFill", {}),
              kernel=conv1_kernel,
              stride=conv1_stride,
              pad=3,
              no_bias=no_bias)

    brew.spatial_bn(model,
                    'conv1',
                    'conv1_spatbn_relu',
                    64,
                    epsilon=1e-3,
                    momentum=0.1,
                    is_test=is_test)
    brew.relu(model, 'conv1_spatbn_relu', 'conv1_spatbn_relu')
    brew.max_pool(model, 'conv1_spatbn_relu', 'pool1', kernel=3, stride=2)

    # Residual blocks...
    builder = ResNetBuilder(model,
                            'pool1',
                            no_bias=no_bias,
                            is_test=is_test,
                            spatial_bn_mom=0.1)

    # conv2_x (ref Table 1 in He et al. (2015))
    builder.add_bottleneck(64, 64, 256)
    builder.add_bottleneck(256, 64, 256)
    builder.add_bottleneck(256, 64, 256)

    # conv3_x
    builder.add_bottleneck(256, 128, 512, down_sampling=True)
    for _ in range(1, 4):
        builder.add_bottleneck(512, 128, 512)

    # conv4_x
    builder.add_bottleneck(512, 256, 1024, down_sampling=True)
    for _ in range(1, 6):
        builder.add_bottleneck(1024, 256, 1024)

    # conv5_x
    builder.add_bottleneck(1024, 512, 2048, down_sampling=True)
    builder.add_bottleneck(2048, 512, 2048)
    builder.add_bottleneck(2048, 512, 2048)

    # Final layers
    final_avg = brew.average_pool(
        model,
        builder.prev_blob,
        'final_avg',
        kernel=final_avg_kernel,
        stride=1,
        global_pooling=True,
    )

    # Final dimension of the "image" is reduced to 7x7
    last_out = brew.fc(model, final_avg, 'last_out_L{}'.format(num_labels),
                       2048, num_labels)

    if no_loss:
        return last_out

    # If we create model for training, use softmax-with-loss
    if (label is not None):
        (softmax, loss) = model.SoftmaxWithLoss(
            [last_out, label],
            ["softmax", "loss"],
        )

        return (softmax, loss)
    else:
        # For inference, we just return softmax
        return brew.softmax(model, last_out, "softmax")
Exemplo n.º 44
0
    def test_cpu2gpu_gpu2cpu_gradients(self):
        model = model_helper.ModelHelper(name="copy_test")

        batch = 32
        cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0)
        gpu_opt = core.DeviceOption(workspace.GpuDeviceType, 0)

        with core.NameScope("cpu"):
            with core.DeviceScope(cpu_opt):
                x_cpu = brew.fc(model, 'data', 'x_cpu', 16, 8)

        with core.NameScope("gpu_0"):
            with core.DeviceScope(gpu_opt):
                x_gpu = model.CopyCPUToGPU(x_cpu, "x_gpu")
                pred_gpu = brew.fc(model, x_gpu, "pred_gpu", 8, 4)
                pred_cpu = model.CopyGPUToCPU(pred_gpu, "pred_cpu")

        with core.DeviceScope(cpu_opt):
            with core.NameScope("cpu"):
                (softmax, loss) = model.SoftmaxWithLoss(
                    [pred_cpu, "label"],
                    ["softmax", "loss"],
                )

        gradient_map = model.AddGradientOperators([loss])

        # Add param updates (for cpu and gpu)
        init_net = model.param_init_net
        with core.DeviceScope(cpu_opt):
            with core.NameScope("cpu"):
                ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
                LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0)
                for param in model.GetParams():
                    model.WeightedSum(
                        [param, ONE, gradient_map[param], LR],
                        param,
                    )

        with core.NameScope("gpu_0"):
            with core.DeviceScope(gpu_opt):
                ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
                LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0)
                for param in model.GetParams():
                    model.WeightedSum(
                        [param, ONE, gradient_map[param], LR],
                        param,
                    )

        with core.DeviceScope(cpu_opt):
            workspace.FeedBlob(
                'cpu/data',
                np.random.rand(batch, 16).astype(np.float32),
            )
            workspace.FeedBlob(
                'cpu/label',
                np.random.randint(4, size=batch).astype(np.int32),
            )

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)

        initial_params = {p: workspace.FetchBlob(p) for p in model.GetParams()}
        workspace.RunNet(model.net.Proto().name)
        updated_params = {p: workspace.FetchBlob(p) for p in model.GetParams()}

        for p in model.GetParams():
            g = gradient_map[p]
            expected = initial_params[p] - 2.0 * workspace.FetchBlob(g)
            actual = updated_params[p]
            self.assertTrue(
                np.array_equal(expected, updated_params[p]),
                "Mismatch: {}: {}, {}".format(p, expected, actual),
            )
Exemplo n.º 45
0
def apply_dot_attention(
    model,
    encoder_output_dim,
    # [batch_size, encoder_output_dim, encoder_length]
    encoder_outputs_transposed,
    # [1, batch_size, decoder_state_dim]
    decoder_hidden_state_t,
    decoder_hidden_state_dim,
    scope,
    encoder_lengths=None,
):
    if decoder_hidden_state_dim != encoder_output_dim:
        weighted_decoder_hidden_state = brew.fc(
            model,
            decoder_hidden_state_t,
            s(scope, 'weighted_decoder_hidden_state'),
            dim_in=decoder_hidden_state_dim,
            dim_out=encoder_output_dim,
            axis=2,
        )
    else:
        weighted_decoder_hidden_state = decoder_hidden_state_t

    # [batch_size, decoder_state_dim]
    squeezed_weighted_decoder_hidden_state = model.net.Squeeze(
        weighted_decoder_hidden_state,
        s(scope, 'squeezed_weighted_decoder_hidden_state'),
        dims=[0],
    )

    # [batch_size, decoder_state_dim, 1]
    expanddims_squeezed_weighted_decoder_hidden_state = model.net.ExpandDims(
        squeezed_weighted_decoder_hidden_state,
        s(scope, 'expanddims_squeezed_weighted_decoder_hidden_state'),
        dims=[2],
    )

    # [batch_size, encoder_length, encoder_output_dim]
    encoder_outputs_for_dot_product = model.net.Transpose(
        encoder_outputs_transposed,
        s(scope, 'encoder_outputs_for_dot_product'),
        axes=[0, 2, 1],
    )

    # [batch_size, encoder_output_dim, 1]
    attention_logits_transposed = model.net.BatchMatMul(
        [
            encoder_outputs_for_dot_product,
            expanddims_squeezed_weighted_decoder_hidden_state,
        ],
        s(scope, 'attention_logits'),
    )

    # [batch_size, encoder_length, 1]
    attention_weights_3d = _calc_attention_weights(
        model=model,
        attention_logits_transposed=attention_logits_transposed,
        scope=scope,
        encoder_lengths=encoder_lengths,
    )

    # [batch_size, encoder_output_dim, 1]
    attention_weighted_encoder_context = _calc_weighted_context(
        model=model,
        encoder_outputs_transposed=encoder_outputs_transposed,
        encoder_output_dim=encoder_output_dim,
        attention_weights_3d=attention_weights_3d,
        scope=scope,
    )
    return attention_weighted_encoder_context, attention_weights_3d, []
Exemplo n.º 46
0
def alexnet():
    model = ModelHelper(name="r", arg_scope={"order": "NCHW", "is_test": True})
    conv1 = brew.conv(model,
                      "data",
                      "conv1",
                      3,
                      64,
                      11, ('XavierFill', {}), ('ConstantFill', {}),
                      stride=4,
                      pad=2)
    relu1 = brew.relu(model, conv1, "conv1")
    pool1 = brew.max_pool(model,
                          relu1,
                          "pool1",
                          kernel=3,
                          stride=2,
                          pad=0,
                          legacy_pad=3)
    lrn1 = brew.lrn(model,
                    pool1,
                    "pool1_lrn",
                    size=5,
                    alpha=1.0e-4,
                    beta=0.75,
                    bias=1.0)
    conv2 = brew.conv(model,
                      lrn1,
                      "conv2",
                      64,
                      192,
                      5, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=2)
    relu2 = brew.relu(model, conv2, "conv2")
    pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2)
    lrn2 = brew.lrn(model,
                    pool2,
                    "pool2_lrn",
                    size=5,
                    alpha=1.0e-4,
                    beta=0.75,
                    bias=1.0)
    conv3 = brew.conv(model,
                      lrn2,
                      "conv3",
                      192,
                      384,
                      3, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=1)
    relu3 = brew.relu(model, conv3, "conv3")
    conv4 = brew.conv(model,
                      relu3,
                      "conv4",
                      384,
                      256,
                      3, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=1)
    relu4 = brew.relu(model, conv4, "conv4")
    conv5 = brew.conv(model,
                      relu4,
                      "conv5",
                      256,
                      256,
                      3, ('XavierFill', {}), ('ConstantFill', {}),
                      pad=1)
    relu5 = brew.relu(model, conv5, "conv5")
    pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2)
    fc6 = brew.fc(model, pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
                  ('ConstantFill', {}))
    relu6 = brew.relu(model, fc6, "fc6")
    fc7 = brew.fc(model, relu6, "fc7", 4096, 4096, ('XavierFill', {}),
                  ('ConstantFill', {}))
    relu7 = brew.relu(model, fc7, "fc7")
    drop7 = brew.dropout(model, relu7, "fc7_dropout", is_test=1, ratio=0.5)
    fc8 = brew.fc(model, drop7, "fc8", 4096, 1000, ('XavierFill', {}),
                  ('ConstantFill', {}))
    relu8 = brew.relu(model, fc8, "fc8")
    brew.dropout(model, relu8, "fc8_dropout", is_test=1, ratio=0.5)
    return model, [(1, 3, 224, 224)]
Exemplo n.º 47
0
 def test_fc_external_initializer(self):
     model = model_helper.ModelHelper(name="test", init_params=False)
     data = model.net.AddExternalInput("data")
     fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1)  # noqa
     self.assertEqual(len(model.net.Proto().op), 1)
     self.assertEqual(len(model.param_init_net.Proto().op), 0)
Exemplo n.º 48
0
    def test_multiple_optimizers(self):
        from caffe2.python import brew, core, optimizer
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test")
        fc1 = brew.fc(model, 'data', 'fc1', 100, 50)
        fc2 = brew.fc(model, fc1, 'fc2', 50, 25)
        pred = brew.fc(model, fc2, 'fc3', 25, 10)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        param_to_device = optimizer._get_param_to_device(model)

        def infer_blob_device(blob_name):
            return optimizer.get_param_device(blob_name,
                                              "{}_grad".format(blob_name),
                                              param_to_device)

        sgd_1 = optimizer.SgdOptimizer(base_learning_rate=0.1)
        sgd_2 = optimizer.SgdOptimizer(base_learning_rate=0.2)
        adagrad = optimizer.AdagradOptimizer()

        # Check same optimizer share the same learning rate.
        with core.DeviceScope(infer_blob_device("fc1_w")):
            sgd_1(model.net, model.param_init_net, "fc1_w", "fc1_w_grad")
        with core.DeviceScope(infer_blob_device("fc1_b")):
            sgd_1(model.net, model.param_init_net, "fc1_b", "fc1_b_grad")
        fc1_lr_blobs = []
        for op in model.net.Proto().op:
            if op.type == 'WeightedSum' and op.input[0] == 'fc1_w' or \
                    op.input[0] == 'fc1_b':
                fc1_lr_blobs.append(op.input[3])
        self.assertEqual(fc1_lr_blobs[0], fc1_lr_blobs[1])

        # Check different instance of the same optimizer has a different lr.
        with core.DeviceScope(infer_blob_device("fc2_w")):
            sgd_2(model.net, model.param_init_net, "fc2_w", "fc2_w_grad")
        with core.DeviceScope(infer_blob_device("fc2_b")):
            sgd_2(model.net, model.param_init_net, "fc2_b", "fc2_b_grad")
        fc2_lr_blobs = []
        for op in model.net.Proto().op:
            if op.type == 'WeightedSum' and op.input[0] == 'fc2_w' or \
                    op.input[0] == 'fc2_b':
                self.assertTrue(op.input[3] not in fc1_lr_blobs)
                fc2_lr_blobs.append(op.input[3])
        self.assertEqual(fc2_lr_blobs[0], fc2_lr_blobs[1])

        # Check different optimizer type case
        with core.DeviceScope(infer_blob_device("fc3_w")):
            adagrad(model.net, model.param_init_net, "fc3_w", "fc3_w_grad")
        with core.DeviceScope(infer_blob_device("fc3_b")):
            adagrad(model.net, model.param_init_net, "fc3_b", "fc3_b_grad")
        fc3_lr_blobs = []
        for op in model.net.Proto().op:
            if op.type == 'Adagrad' and op.input[0] == 'fc3_w' or \
                    op.input[0] == 'fc3_b':
                self.assertTrue(op.input[3] not in fc2_lr_blobs)
                self.assertTrue(op.input[3] not in fc1_lr_blobs)
                fc3_lr_blobs.append(op.input[3])
        self.assertEqual(fc3_lr_blobs[0], fc3_lr_blobs[1])
Exemplo n.º 49
0
def double_matmul():
    model = ModelHelper(name="r")
    fc0 = brew.fc(model, "data", "fc0", 10, 10)
    fc1 = brew.fc(model, fc0, "fc1", 10, 10)
    model.Proto().external_output[:] = [str(fc0), str(fc1)]
    return model, [(1, 10)]
Exemplo n.º 50
0
    def test_simple_model(self):
        model = model_helper.ModelHelper(name="mnist")
        # how come those inputs don't break the forward pass =.=a
        workspace.FeedBlob("data",
                           np.random.randn(1, 3, 64, 64).astype(np.float32))
        workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))

        with core.NameScope("conv1"):
            conv1 = brew.conv(model,
                              "data",
                              'conv1',
                              dim_in=1,
                              dim_out=20,
                              kernel=5)
            # Image size: 24 x 24 -> 12 x 12
            pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
            # Image size: 12 x 12 -> 8 x 8
            conv2 = brew.conv(model,
                              pool1,
                              'conv2',
                              dim_in=20,
                              dim_out=100,
                              kernel=5)
            # Image size: 8 x 8 -> 4 x 4
            pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
        with core.NameScope("classifier"):
            # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
            fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500)
            relu = brew.relu(model, fc3, fc3)
            pred = brew.fc(model, relu, 'pred', 500, 10)
            softmax = brew.softmax(model, pred, 'softmax')
            xent = model.LabelCrossEntropy([softmax, "label"], 'xent')
            # compute the expected loss
            loss = model.AveragedLoss(xent, "loss")
        model.net.RunAllOnMKL()
        model.param_init_net.RunAllOnMKL()
        model.AddGradientOperators([loss], skip=1)
        blob_name_tracker = {}
        graph = tb.model_to_graph_def(
            model,
            blob_name_tracker=blob_name_tracker,
            shapes={},
            show_simplified=False,
        )
        #self.assertEqual(
        #    blob_name_tracker['GRADIENTS/conv1/conv1_b_grad'],
        #    'conv1/conv1_b_grad',
        #)
        self.maxDiff = None
        # We can't guarantee the order in which they appear, so we sort
        # both before we compare them
        with open('tests/expect/caffe_mnist.expect') as f:
            EXPECTED_MNIST = f.read()
        sep = "node {"
        expected = "\n".join(
            sorted(sep + "\n  " + part.strip()
                   for part in EXPECTED_MNIST.strip().split(sep)
                   if part.strip()))
        actual = "\n".join(
            sorted(sep + "\n  " + part.strip()
                   for part in str(graph).strip().split(sep) if part.strip()))
Exemplo n.º 51
0
    def _apply(
        self,
        model,
        input_t,
        seq_lengths,
        states,
        timestep,
        extra_inputs=None,
    ):
        hidden_t_prev, cell_t_prev = states

        fc_input = hidden_t_prev
        fc_input_dim = self.hidden_size

        if extra_inputs is not None:
            extra_input_blobs, extra_input_sizes = zip(*extra_inputs)
            fc_input, _ = model.net.Concat(
                [hidden_t_prev] + list(extra_input_blobs),
                [
                    self.scope('gates_concatenated_input_t'),
                    self.scope('_gates_concatenated_input_t_concat_dims'),
                ],
                axis=2,
            )
            fc_input_dim += sum(extra_input_sizes)

        prev_t = brew.fc(
            model,
            fc_input,
            self.scope('prev_t'),
            dim_in=fc_input_dim,
            dim_out=4 * self.hidden_size,
            axis=2,
        )

        # defining MI parameters
        alpha = model.param_init_net.ConstantFill(
            [],
            [self.scope('alpha')],
            shape=[4 * self.hidden_size],
            value=1.0,
        )
        beta_h = model.param_init_net.ConstantFill(
            [],
            [self.scope('beta1')],
            shape=[4 * self.hidden_size],
            value=1.0,
        )
        beta_i = model.param_init_net.ConstantFill(
            [],
            [self.scope('beta2')],
            shape=[4 * self.hidden_size],
            value=1.0,
        )
        b = model.param_init_net.ConstantFill(
            [],
            [self.scope('b')],
            shape=[4 * self.hidden_size],
            value=0.0,
        )
        model.params.extend([alpha, beta_h, beta_i, b])

        # alpha * input_t + beta_h
        # Shape: [1, batch_size, 4 * hidden_size]
        alpha_by_input_t_plus_beta_h = model.net.ElementwiseLinear(
            [input_t, alpha, beta_h],
            self.scope('alpha_by_input_t_plus_beta_h'),
            axis=2,
        )
        # (alpha * input_t + beta_h) * prev_t =
        # alpha * input_t * prev_t + beta_h * prev_t
        # Shape: [1, batch_size, 4 * hidden_size]
        alpha_by_input_t_plus_beta_h_by_prev_t = model.net.Mul(
            [alpha_by_input_t_plus_beta_h, prev_t],
            self.scope('alpha_by_input_t_plus_beta_h_by_prev_t')
        )
        # beta_i * input_t + b
        # Shape: [1, batch_size, 4 * hidden_size]
        beta_i_by_input_t_plus_b = model.net.ElementwiseLinear(
            [input_t, beta_i, b],
            self.scope('beta_i_by_input_t_plus_b'),
            axis=2,
        )
        # alpha * input_t * prev_t + beta_h * prev_t + beta_i * input_t + b
        # Shape: [1, batch_size, 4 * hidden_size]
        gates_t = model.net.Sum(
            [alpha_by_input_t_plus_beta_h_by_prev_t, beta_i_by_input_t_plus_b],
            self.scope('gates_t')
        )
        hidden_t, cell_t = model.net.LSTMUnit(
            [hidden_t_prev, cell_t_prev, gates_t, seq_lengths, timestep],
            [self.scope('hidden_t_intermediate'), self.scope('cell_t')],
            forget_bias=self.forget_bias,
            drop_states=self.drop_states,
        )
        model.net.AddExternalOutputs(
            cell_t,
            hidden_t,
        )
        if self.memory_optimization:
            self.recompute_blobs = [gates_t]
        return hidden_t, cell_t
Exemplo n.º 52
0
 def test_registry_invalid(self, input_dim, output_dim, batch_size):
     m = model_helper.ModelHelper()
     brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
     with self.assertRaises(RuntimeError):
         workspace.ApplyTransform("definitely_not_a_real_transform",
                                  m.net.Proto())
Exemplo n.º 53
0
 def _MiniAlexNetNoDropout(self, order):
     # First, AlexNet using the cnn wrapper.
     model = model_helper.ModelHelper(name="alexnet")
     conv1 = brew.conv(model,
                       "data",
                       "conv1",
                       3,
                       16,
                       11, ("XavierFill", {}), ("ConstantFill", {}),
                       stride=4,
                       pad=0)
     relu1 = brew.relu(model, conv1, "relu1")
     norm1 = brew.lrn(model,
                      relu1,
                      "norm1",
                      size=5,
                      alpha=0.0001,
                      beta=0.75)
     pool1 = brew.max_pool(model, norm1, "pool1", kernel=3, stride=2)
     conv2 = brew.group_conv(model,
                             pool1,
                             "conv2",
                             16,
                             32,
                             5, ("XavierFill", {}), ("ConstantFill", {
                                 "value": 0.1
                             }),
                             group=2,
                             stride=1,
                             pad=2)
     relu2 = brew.relu(model, conv2, "relu2")
     norm2 = brew.lrn(model,
                      relu2,
                      "norm2",
                      size=5,
                      alpha=0.0001,
                      beta=0.75)
     pool2 = brew.max_pool(model, norm2, "pool2", kernel=3, stride=2)
     conv3 = brew.conv(model,
                       pool2,
                       "conv3",
                       32,
                       64,
                       3, ("XavierFill", {
                           'std': 0.01
                       }), ("ConstantFill", {}),
                       pad=1)
     relu3 = brew.relu(model, conv3, "relu3")
     conv4 = brew.group_conv(model,
                             relu3,
                             "conv4",
                             64,
                             64,
                             3, ("XavierFill", {}), ("ConstantFill", {
                                 "value": 0.1
                             }),
                             group=2,
                             pad=1)
     relu4 = brew.relu(model, conv4, "relu4")
     conv5 = brew.group_conv(model,
                             relu4,
                             "conv5",
                             64,
                             32,
                             3, ("XavierFill", {}), ("ConstantFill", {
                                 "value": 0.1
                             }),
                             group=2,
                             pad=1)
     relu5 = brew.relu(model, conv5, "relu5")
     pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2)
     fc6 = brew.fc(model, pool5, "fc6", 1152, 1024, ("XavierFill", {}),
                   ("ConstantFill", {
                       "value": 0.1
                   }))
     relu6 = brew.relu(model, fc6, "relu6")
     fc7 = brew.fc(model, relu6, "fc7", 1024, 1024, ("XavierFill", {}),
                   ("ConstantFill", {
                       "value": 0.1
                   }))
     relu7 = brew.relu(model, fc7, "relu7")
     fc8 = brew.fc(model, relu7, "fc8", 1024, 5, ("XavierFill", {}),
                   ("ConstantFill", {
                       "value": 0.0
                   }))
     pred = brew.softmax(model, fc8, "pred")
     xent = model.LabelCrossEntropy([pred, "label"], "xent")
     loss = model.AveragedLoss([xent], ["loss"])
     model.AddGradientOperators([loss])
     return model
Exemplo n.º 54
0
    def test_gradient_optim(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        with core.NameScope("name_x"):
            fc1 = brew.fc(m,
                          "data",
                          "fc1",
                          dim_in=input_dim,
                          dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
            fc5.Relu([], fc5)\
               .Softmax([], "pred") \
               .LabelCrossEntropy(["label"], ["xent"]) \
               .AveragedLoss([], "loss")
        input_to_grad = m.AddGradientOperators(["name_x/loss"])

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.share_grad_blobs(
            m.net,
            ["name_x/loss"],
            set(viewvalues(m.param_to_grad)),
            "name_x/",
            share_activations=False,
        )
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)

        optim_proto_wacts = memonger.share_grad_blobs(
            m.net,
            ["name_x/loss"],
            set(viewvalues(m.param_to_grad)),
            "name_x/",
            share_activations=True,
            dont_share_blobs=set([str(input_to_grad["name_x/fc1_w"])]),
        )
        blobs_wact_optim = count_blobs(optim_proto_wacts)
        self.assertLessEqual(blobs_wact_optim, blobs_after)

        # Check that the last activations are not shared
        self.assertTrue(has_blob(optim_proto, "name_x/fc5"))
        self.assertTrue(
            has_blob(optim_proto_wacts, "name_x/fc5"),
            "Dont remap final activation",
        )

        # Test networks produce exactly same gradients
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(low=0, high=output_dim,
                                  size=(batch_size, )).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss = workspace.FetchBlob("name_x/loss")
        grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
        workspace.RunNetOnce(optim_proto)
        optimized_loss = workspace.FetchBlob("name_x/loss")
        optimized_grad = workspace.FetchBlob(str(
            input_to_grad["name_x/fc1_w"]))
        np.testing.assert_almost_equal(loss, optimized_loss)
        np.testing.assert_almost_equal(grad, optimized_grad)

        workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))

        # Run with the forward optimization
        workspace.RunNetOnce(optim_proto_wacts)
        optimized_loss = workspace.FetchBlob("name_x/loss")
        optimized_grad = workspace.FetchBlob(str(
            input_to_grad["name_x/fc1_w"]))
        np.testing.assert_almost_equal(loss, optimized_loss)
        np.testing.assert_almost_equal(grad, optimized_grad)
def build_mnist_lenet(model, input_blob_name):
    """Build the LeNet network for MNIST."""

    # Convolution layer that operates on the input MNIST image
    # Input is grayscale image of size 28x28 pixels
    # After convolution by 20 kernels each of size 5x5,
    # output is 20 channels, each of size 24x24
    layer_1_input_dims = 1  # Input to layer is grayscale, so 1 channel
    layer_1_output_dims = 20  # Output from this layer has 20 channels
    layer_1_kernel_dims = 5  # Each kernel is of size 1x5x5
    layer_1_conv = brew.conv(
        model,
        input_blob_name,
        "layer_1_conv",
        dim_in=layer_1_input_dims,
        dim_out=layer_1_output_dims,
        kernel=layer_1_kernel_dims,
    )

    # Max-pooling layer that operates on output from previous convolution layer
    # Input is 20 channels, each of size 24x24
    # After pooling by 2x2 windows and stride of 2, the output of this layer
    # is 20 channels, each of size 12x12
    layer_2_kernel_dims = 2  # Max-pool over 2x2 windows
    layer_2_stride = 2  # Stride by 2 pixels between each pool
    layer_2_pool = brew.max_pool(
        model,
        layer_1_conv,
        "layer_2_pool",
        kernel=layer_2_kernel_dims,
        stride=layer_2_stride,
    )

    # Convolution layer that operates on output from previous pooling layer.
    # Input is 20 channels, each of size 12x12
    # After convolution by 50 kernels, each of size 20x5x5,
    # the output is 50 channels, each of size 8x8
    layer_3_input_dims = 20  # Number of input channels
    layer_3_output_dims = 50  # Number of output channels
    layer_3_kernel_dims = 5  # Each kernel is of size 50x5x5
    layer_3_conv = brew.conv(
        model,
        layer_2_pool,
        "layer_3_conv",
        dim_in=layer_3_input_dims,
        dim_out=layer_3_output_dims,
        kernel=layer_3_kernel_dims,
    )

    # Max-pooling layer that operates on output from previous convolution layer
    # Input is 50 channels, each of size 8x8
    # Apply pooling by 2x2 windows and stride of 2
    # Output is 50 channels, each of size 4x4
    layer_4_kernel_dims = 2  # Max-pool over 2x2 windows
    layer_4_stride = 2  # Stride by 2 pixels between each pool
    layer_4_pool = brew.max_pool(
        model,
        layer_3_conv,
        "layer_4_pool",
        kernel=layer_4_kernel_dims,
        stride=layer_4_stride,
    )

    # Fully-connected layer that operates on output from previous pooling layer
    # Input is 50 channels, each of size 4x4
    # Output is vector of size 500
    layer_5_input_dims = 50 * 4 * 4
    layer_5_output_dims = 500
    layer_5_fc = brew.fc(
        model,
        layer_4_pool,
        "layer_5_fc",
        dim_in=layer_5_input_dims,
        dim_out=layer_5_output_dims,
    )

    # ReLU layer that operates on output from previous fully-connected layer
    # Input and output are both of size 500
    layer_6_relu = brew.relu(
        model,
        layer_5_fc,
        "layer_6_relu",
    )

    # Fully-connected layer that operates on output from previous ReLU layer
    # Input is of size 500
    # Output is of size 10, the number of classes in MNIST dataset
    layer_7_input_dims = 500
    layer_7_output_dims = 10
    layer_7_fc = brew.fc(
        model,
        layer_6_relu,
        "layer_7_fc",
        dim_in=layer_7_input_dims,
        dim_out=layer_7_output_dims,
    )

    # Softmax layer that operates on output from previous fully-connected layer
    # Input and output are both of size 10
    # Each output (0 to 9) is a probability score on that digit
    layer_8_softmax = brew.softmax(
        model,
        layer_7_fc,
        "softmax",
    )

    return layer_8_softmax
Exemplo n.º 56
0
    def test_gradient_optim_tree(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        with core.NameScope("name_x"):
            fc1 = brew.fc(m,
                          "data",
                          "fc1",
                          dim_in=input_dim,
                          dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
            fc5.Relu([], fc5) \
               .Softmax([], "pred1") \
               .LabelCrossEntropy(["label"], ["xent1"]) \
               .AveragedLoss([], "loss1")
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
            fc6.Relu([], fc6) \
               .Softmax([], "pred2") \
               .LabelCrossEntropy(["label"], ["xent2"]) \
               .AveragedLoss([], "loss2")
        input_to_grad = m.AddGradientOperators(
            ["name_x/loss1", "name_x/loss2"])

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.share_grad_blobs(
            m.net,
            ["name_x/loss1", "name_x/loss2"],
            set(viewvalues(m.param_to_grad)),
            "name_x",  # "name_x//shared_gradinp_0_shared" if using "name_x/"
            share_activations=True,
            dont_share_blobs=set([
                'name_x/fc6', 'name_x/fc5',
                str(input_to_grad["name_x/fc1_w"])
            ]),
        )
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)
        self.assertTrue(has_blob(optim_proto, "name_x/fc6"))

        # Test networks produce exactly same gradients
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(low=0, high=output_dim,
                                  size=(batch_size, )).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss1 = workspace.FetchBlob("name_x/loss1")
        loss2 = workspace.FetchBlob("name_x/loss2")
        grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
        workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))

        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
        optimized_grad = workspace.FetchBlob(str(
            input_to_grad["name_x/fc1_w"]))
        np.testing.assert_almost_equal(loss1, optimized_loss1)
        np.testing.assert_almost_equal(loss2, optimized_loss2)
        np.testing.assert_almost_equal(grad, optimized_grad)
Exemplo n.º 57
0
from caffe2.python.model_helper import ModelHelper
from caffe2.python.cnn import CNNModelHelper

import unittest
import numpy as np

m, k, n = (1, 28 * 28, 10)  # [m][k] * [k][n] = [m][n]
x = np.random.rand(m, k).astype(np.float32) - 0.5  # x = m*k 2D tensor

workspace.ResetWorkspace()  # clear workspace
workspace.FeedBlob("x", x)  # feed x as a blob
model = ModelHelper(name="test_model")  # create model

model.Proto()  # print model's protocol buffer before add operator
brew.fc(
    model, "x", "y", k, n
)  # fully connected NN, weight = k*n 2D tensor /// bias, y = m*n 2D tensor
brew.softmax(model, "y", "z")
model.Validate()
model.Proto()  # print model's protocol buffer after add operator

workspace.RunNetOnce(
    model.param_init_net)  # init [y_w(weight), y_b(bias) (randomize)]
# weight is 2D array, bias is 1D array
workspace.Blobs()  # print workspace's blobs
# workspace.FetchBlob("y_w")
# workspace.FetchBlob("y_b")

workspace.RunNetOnce(model.net)
# y = workspace.FetchBlob("y")
# z = workspace.FetchBlob("z")
Exemplo n.º 58
0
    def test_forward_optim_tree_harder(self, input_dim, output_dim,
                                       batch_size):
        m = model_helper.ModelHelper()
        m.net.Proto().type = "dag"
        m.net.Proto().num_workers = 4
        m.net.AddExternalInput("label")
        m.net.AddExternalInput("data")

        with core.NameScope("name_x"):
            fc1 = brew.fc(m,
                          "data",
                          "fc1",
                          dim_in=input_dim,
                          dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)

            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)

            # Branch
            fc3b = brew.fc(m,
                           fc2,
                           "fc3b",
                           dim_in=output_dim,
                           dim_out=output_dim)
            fc4b = brew.fc(m,
                           fc3b,
                           "fc4b",
                           dim_in=output_dim,
                           dim_out=output_dim)
            fc5b = brew.fc(m,
                           fc4b,
                           "fc5b",
                           dim_in=output_dim,
                           dim_out=output_dim)

            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
            fc5sum.Relu([], "relu1") \
               .Softmax([], "pred1") \
               .LabelCrossEntropy(["label"], ["xent1"]) \
               .AveragedLoss([], "loss1")
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
            fc6.Relu([], fc6) \
               .Softmax([], "pred2") \
               .LabelCrossEntropy(["label"], ["xent2"]) \
               .AveragedLoss([], "loss2")

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.optimize_inference_for_dag(
            m.net, ["name_x/data"], "name_x/")

        blobs_after = count_blobs(optim_proto)

        # Extra test with when one of the parameters is also an input.
        # This caused a bug before.
        optim_proto_extra_input = memonger.optimize_inference_for_dag(
            m.net, ["name_x/data", "name_x/fc1_w"], "name_x/")
        blobs_after_extra_input = count_blobs(optim_proto_extra_input)
        self.assertEqual(blobs_after, blobs_after_extra_input)
        ###

        print(str(optim_proto))
        self.assertLess(blobs_after, blobs_before)

        # Test networks produce exactly same results
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(low=0, high=output_dim,
                                  size=(batch_size, )).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss1 = workspace.FetchBlob("name_x/loss1")
        loss2 = workspace.FetchBlob("name_x/loss2")
        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
        np.testing.assert_almost_equal(loss1, optimized_loss1)
        np.testing.assert_almost_equal(loss2, optimized_loss2)
Exemplo n.º 59
0
    def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        m.Proto().type = "dag"
        m.Proto().num_workers = 4

        with core.NameScope("name_x"):
            fc1 = brew.fc(m,
                          "data",
                          "fc1",
                          dim_in=input_dim,
                          dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)

            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)

            # Branch
            fc3b = brew.fc(m,
                           fc2,
                           "fc3b",
                           dim_in=output_dim,
                           dim_out=output_dim)
            fc4b = brew.fc(m,
                           fc3b,
                           "fc4b",
                           dim_in=output_dim,
                           dim_out=output_dim)
            fc5b = brew.fc(m,
                           fc4b,
                           "fc5b",
                           dim_in=output_dim,
                           dim_out=output_dim)

            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")

            fc5.Relu([], fc5sum) \
               .Softmax([], "pred1") \
               .LabelCrossEntropy(["label"], ["xent1"]) \
               .AveragedLoss([], "loss1")
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
            fc6.Relu([], fc6) \
               .Softmax([], "pred2") \
               .LabelCrossEntropy(["label"], ["xent2"]) \
               .AveragedLoss([], "loss2")

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.optimize_inference_for_dag(
            m.net, ["name_x/data"], "name_x")
        self.assertTrue(
            memonger.verify_graph_equality(m.net.Proto(), optim_proto))
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)

        # Test networks produce exactly same results
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(low=0, high=output_dim,
                                  size=(batch_size, )).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss1 = workspace.FetchBlob("name_x/loss1")
        loss2 = workspace.FetchBlob("name_x/loss2")
        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
        np.testing.assert_almost_equal(loss1, optimized_loss1)
        np.testing.assert_almost_equal(loss2, optimized_loss2)
Exemplo n.º 60
0
    if prediction < np.random.uniform(0, 1):
        return 0
    else:
        return 1


avg_t = np.array([])

input_data = np.random.rand(1, 4).astype(np.float32)
workspace.FeedBlob("input_data", input_data)

forward_model = model_helper.ModelHelper(name="forward")
forward_init_net = forward_model.param_init_net
forward_net = forward_model.net

brew.fc(forward_model, 'input_data', 'hidden', 4, HIDDEN_SIZE)
brew.relu(forward_model, 'hidden', 'hidden')
brew.fc(forward_model, 'hidden', 'prediction', HIDDEN_SIZE, 1)
forward_model.Sigmoid('prediction', 'prediction')

full_model = model_helper.ModelHelper(name="full")
full_init_net = full_model.param_init_net
full_net = full_model.net
loss = full_net.ConstantFill([], "loss", shape=[1], value=0.0)
ONE = full_net.ConstantFill([], "ONE", shape=[1], value=1.)

brew.fc(full_model, 'input_data', 'hidden', 4, HIDDEN_SIZE)
brew.relu(full_model, 'hidden', 'hidden')
brew.fc(full_model, 'hidden', 'prediction', HIDDEN_SIZE, 1)
full_model.Sigmoid('prediction', 'prediction')
gradient_map = full_net.AddGradientOperators(['loss'])