Пример #1
0
        def model_build_fun(model, loss_scale):
            workspace.FeedBlob(
                core.ScopedBlobReference("seq_lengths"),
                np.array([self.T] * self.batch_per_device, dtype=np.int32)
            )
            model.param_init_net.ConstantFill(
                [],
                "hidden_init",
                value=0.0,
                shape=[1, self.batch_per_device, self.hidden_dim]
            )
            model.param_init_net.ConstantFill(
                [],
                "cell_init",
                value=0.0,
                shape=[1, self.batch_per_device, self.hidden_dim]
            )

            output, _last_hidden, _, _last_state, = rnn_cell.LSTM(
                model=model,
                input_blob="data",
                seq_lengths="seq_lengths",
                initial_states=("hidden_init", "cell_init"),
                dim_in=self.input_dim,
                dim_out=self.hidden_dim,
                scope="partest",
            )

            # A silly loss function
            loss = model.AveragedLoss(
                model.Sub([output, "target"], "dist"),
                "loss",
            )
            loss = model.Scale(loss, "loss_scaled", scale=loss_scale)
            return [loss]
Пример #2
0
def create_model(args, queue, label_queue, input_shape):
    model = cnn.CNNModelHelper(name="LSTM_bench")
    seq_lengths, hidden_init, cell_init, target = \
        model.net.AddExternalInputs(
            'seq_lengths',
            'hidden_init',
            'cell_init',
            'target',
        )
    input_blob = model.DequeueBlobs(queue, "input_data")
    labels = model.DequeueBlobs(label_queue, "label")

    if args.implementation == "own":
        output, last_hidden, _, last_state = rnn_cell.LSTM(
            model=model,
            input_blob=input_blob,
            seq_lengths=seq_lengths,
            initial_states=(hidden_init, cell_init),
            dim_in=args.input_dim,
            dim_out=args.hidden_dim,
            scope="lstm1",
            memory_optimization=args.memory_optimization,
        )
    elif args.implementation == "cudnn":
        # We need to feed a placeholder input so that RecurrentInitOp
        # can infer the dimensions.
        model.param_init_net.ConstantFill([], input_blob, shape=input_shape)
        output, last_hidden, _ = rnn_cell.cudnn_LSTM(
            model=model,
            input_blob=input_blob,
            initial_states=(hidden_init, cell_init),
            dim_in=args.input_dim,
            dim_out=args.hidden_dim,
            scope="cudnnlstm",
            num_layers=1,
        )

    else:
        assert False, "Unknown implementation"

    weights = model.UniformFill(labels, "weights")
    softmax, loss = model.SoftmaxWithLoss(
        [model.Flatten(output), labels, weights],
        ['softmax', 'loss'],
    )

    model.AddGradientOperators([loss])

    # carry states over
    model.net.Copy(last_hidden, hidden_init)
    model.net.Copy(last_hidden, cell_init)

    workspace.FeedBlob(
        hidden_init,
        np.zeros([1, args.batch_size, args.hidden_dim], dtype=np.float32))
    workspace.FeedBlob(
        cell_init,
        np.zeros([1, args.batch_size, args.hidden_dim], dtype=np.float32))
    return model, output
Пример #3
0
    def _create_lstm(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert LSTMs without access to the full model"
        assert pred_model is not None, "cannot convert LSTMs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for LSTM input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM(
            pred_mh,
            input_blob,
            sequence_lens,
            [initial_h, initial_c],
            input_size,
            hidden_size,
            name,
            forward_only=True,
            return_params=True
        )

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "_bias_i2h"
        Br = name + "_bias_gates"
        init_net.Slice(B, Bi, starts=[0*hidden_size], ends=[4*hidden_size])
        init_net.Slice(B, Br, starts=[4*hidden_size], ends=[8*hidden_size])

        # caffe2 has a different order from onnx. We need to rearrange
        #   i o f c -> i f o c
        reforms = ((W,  params['input']    ['weights'], [(0, input_size)]),
                   (R,  params['recurrent']['weights'], [(0, hidden_size)]),
                   (Bi, params['input']    ['biases'],  []),
                   (Br, params['recurrent']['biases'],  []))
        for name_from, name_to, extra_dims in reforms:
            xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
            for i, x in enumerate([xi, xo, xf, xc]):
                dim0 = i * hidden_size, (i+1) * hidden_size
                starts, ends = zip(dim0, *extra_dims)
                init_net.Slice(name_from, x, starts=starts, ends=ends)
            init_net.Concat([xi, xf, xo, xc], [name_to, dummy_name()], axis=0)

        pred_mh.net = pred_mh.net.Clone(
            "dummy-clone-net",
            blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
        )

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Пример #4
0
    def test_rnn(self):
        from caffe2.python import rnn_cell
        T = 5
        model = model_helper.ModelHelper()
        seq_lengths, labels = \
            model.net.AddExternalInputs(
                'seq_lengths', 'labels',
            )
        init_blobs = []
        for i in range(2):
            hidden_init, cell_init = model.net.AddExternalInputs(
                "hidden_init_{}".format(i),
                "cell_init_{}".format(i)
            )
            init_blobs.extend([hidden_init, cell_init])
        model.param_init_net.ConstantFill([], ["input"], shape=[T, 4, 10])
        output, last_hidden, _, last_state = rnn_cell.LSTM(
            model=model,
            input_blob="input",
            seq_lengths=seq_lengths,
            initial_states=init_blobs,
            dim_in=10,
            dim_out=[10, 10],
            scope="lstm1",
            forward_only=False,
            drop_states=True,
            return_last_layer_only=True,
        )
        softmax, loss = model.net.SoftmaxWithLoss(
            [model.Flatten(output), "labels"],
            ['softmax', 'loss'],
        )

        model.AddGradientOperators([loss])
        blobs_before = count_blobs(model.net.Proto())
        optim_proto = memonger.share_grad_blobs(
            model.net,
            ["loss"],
            set(viewvalues(model.param_to_grad)),
            "",
            share_activations=True,
            dont_share_blobs=set(),
        )
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)

        # Run once to see all blobs are set up correctly
        for init_blob in init_blobs:
            workspace.FeedBlob(init_blob, np.zeros(
                [1, 4, 10], dtype=np.float32
            ))
        workspace.FeedBlob("seq_lengths", np.array([T] * 4, dtype=np.int32))
        workspace.FeedBlob("labels", np.random.rand(T).astype(np.int32))

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)
Пример #5
0
    def init_lstm_model(self, T, num_layers, forward_only, use_loss=True):
        workspace.FeedBlob("seq_lengths",
                           np.array([T] * self.batch_size, dtype=np.int32))
        workspace.FeedBlob(
            "target",
            np.random.rand(T, self.batch_size,
                           self.hidden_dim).astype(np.float32))
        workspace.FeedBlob(
            "hidden_init",
            np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32))
        workspace.FeedBlob(
            "cell_init",
            np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32))

        model = model_helper.ModelHelper(name="lstm")
        model.net.AddExternalInputs(["input"])

        init_blobs = []
        for i in range(num_layers):
            hidden_init, cell_init = model.net.AddExternalInputs(
                "hidden_init_{}".format(i), "cell_init_{}".format(i))
            init_blobs.extend([hidden_init, cell_init])

        output, last_hidden, _, last_state = rnn_cell.LSTM(
            model=model,
            input_blob="input",
            seq_lengths="seq_lengths",
            initial_states=init_blobs,
            dim_in=self.input_dim,
            dim_out=[self.hidden_dim] * num_layers,
            scope="",
            drop_states=True,
            forward_only=forward_only,
            return_last_layer_only=True,
        )

        if use_loss:
            loss = model.AveragedLoss(
                model.SquaredL2Distance([output, "target"], "dist"), "loss")
            # Add gradient ops
            if not forward_only:
                model.AddGradientOperators([loss])

        # init
        for init_blob in init_blobs:
            workspace.FeedBlob(
                init_blob,
                np.zeros([1, self.batch_size, self.hidden_dim],
                         dtype=np.float32))

        return model, output
Пример #6
0
def rnn_unidirectional_encoder(model, embedded_inputs, input_lengths,
                               initial_hidden_state, initial_cell_state,
                               embedding_size, encoder_num_units,
                               use_attention):
    """ Unidirectional (forward pass) LSTM encoder."""

    outputs, final_hidden_state, _, final_cell_state = rnn_cell.LSTM(
        model=model,
        input_blob=embedded_inputs,
        seq_lengths=input_lengths,
        initial_states=(initial_hidden_state, initial_cell_state),
        dim_in=embedding_size,
        dim_out=encoder_num_units,
        scope='encoder',
        outputs_with_grads=([0] if use_attention else [1, 3]),
    )
    return outputs, final_hidden_state, final_cell_state
Пример #7
0
    def test_lstm_params(self):
        model = ModelHelper(name="lstm_params_test")

        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
            output, _, _, _ = rnn_cell.LSTM(
                model=model,
                input_blob="input",
                seq_lengths="seqlengths",
                initial_states=None,
                dim_in=20,
                dim_out=40,
                scope="test",
                drop_states=True,
                return_last_layer_only=True,
            )
        for param in model.GetParams():
            self.assertNotEqual(model.get_param_info(param), None)
Пример #8
0
    def test_multi_lstm(
        self,
        input_length,
        dim_in,
        max_num_units,
        num_layers,
        batch_size,
    ):
        model = ModelHelper(name='external')
        (
            input_sequence,
            seq_lengths,
        ) = model.net.AddExternalInputs(
            'input_sequence',
            'seq_lengths',
        )
        dim_out = [
            np.random.randint(1, max_num_units + 1)
            for _ in range(num_layers)
        ]
        h_all, h_last, c_all, c_last = rnn_cell.LSTM(
            model=model,
            input_blob=input_sequence,
            seq_lengths=seq_lengths,
            initial_states=None,
            dim_in=dim_in,
            dim_out=dim_out,
            scope='test',
            outputs_with_grads=(0,),
            return_params=False,
            memory_optimization=False,
            forget_bias=0.0,
            forward_only=False,
            return_last_layer_only=True,
        )

        workspace.RunNetOnce(model.param_init_net)

        seq_lengths_val = np.random.randint(
            1,
            input_length + 1,
            size=(batch_size),
        ).astype(np.int32)
        input_sequence_val = np.random.randn(
            input_length,
            batch_size,
            dim_in,
        ).astype(np.float32)
        workspace.FeedBlob(seq_lengths, seq_lengths_val)
        workspace.FeedBlob(input_sequence, input_sequence_val)

        hidden_input_list = []
        cell_input_list = []
        i2h_w_list = []
        i2h_b_list = []
        gates_w_list = []
        gates_b_list = []

        for i in range(num_layers):
            hidden_input_list.append(
                workspace.FetchBlob('test/initial_hidden_state_{}'.format(i)),
            )
            cell_input_list.append(
                workspace.FetchBlob('test/initial_cell_state_{}'.format(i)),
            )
            i2h_w_list.append(
                workspace.FetchBlob('test/layer_{}/i2h_w'.format(i)),
            )
            i2h_b_list.append(
                workspace.FetchBlob('test/layer_{}/i2h_b'.format(i)),
            )
            gates_w_list.append(
                workspace.FetchBlob('test/layer_{}/gates_t_w'.format(i)),
            )
            gates_b_list.append(
                workspace.FetchBlob('test/layer_{}/gates_t_b'.format(i)),
            )

        workspace.RunNetOnce(model.net)
        h_all_calc = workspace.FetchBlob(h_all)
        h_last_calc = workspace.FetchBlob(h_last)
        c_all_calc = workspace.FetchBlob(c_all)
        c_last_calc = workspace.FetchBlob(c_last)

        h_all_ref, h_last_ref, c_all_ref, c_last_ref = multi_lstm_reference(
            input_sequence_val,
            hidden_input_list,
            cell_input_list,
            i2h_w_list,
            i2h_b_list,
            gates_w_list,
            gates_b_list,
            seq_lengths_val,
            forget_bias=0.0,
        )

        h_all_delta = np.abs(h_all_ref - h_all_calc).sum()
        h_last_delta = np.abs(h_last_ref - h_last_calc).sum()
        c_all_delta = np.abs(c_all_ref - c_all_calc).sum()
        c_last_delta = np.abs(c_last_ref - c_last_calc).sum()

        self.assertAlmostEqual(h_all_delta, 0.0, places=5)
        self.assertAlmostEqual(h_last_delta, 0.0, places=5)
        self.assertAlmostEqual(c_all_delta, 0.0, places=5)
        self.assertAlmostEqual(c_last_delta, 0.0, places=5)

        input_values = {
            'input_sequence': input_sequence_val,
            'seq_lengths': seq_lengths_val,
        }
        for param in model.GetParams():
            value = workspace.FetchBlob(param)
            input_values[str(param)] = value

        output_sum = model.net.SumElements(
            [h_all],
            'output_sum',
            average=True,
        )
        fake_loss = model.net.Tanh(
            output_sum,
        )
        for param in model.GetParams():
            gradient_checker.NetGradientChecker.Check(
                model.net,
                outputs_with_grad=[fake_loss],
                input_values=input_values,
                input_to_check=str(param),
                print_net=False,
                step_size=0.0001,
                threshold=0.05,
            )
Пример #9
0
    def testEqualToCudnn(self):
        with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType)):
            T = 8
            batch_size = 4
            input_dim = 8
            hidden_dim = 31

            workspace.FeedBlob("seq_lengths",
                               np.array([T] * batch_size, dtype=np.int32))
            workspace.FeedBlob(
                "target",
                np.zeros([T, batch_size, hidden_dim], dtype=np.float32))
            workspace.FeedBlob(
                "hidden_init",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))
            workspace.FeedBlob(
                "cell_init",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))

            own_model = model_helper.ModelHelper(name="own_lstm")

            input_shape = [T, batch_size, input_dim]
            cudnn_model = model_helper.ModelHelper(name="cudnn_lstm")
            input_blob = cudnn_model.param_init_net.UniformFill(
                [], "input", shape=input_shape)
            workspace.FeedBlob(
                "CUDNN/hidden_init_cudnn",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))
            workspace.FeedBlob(
                "CUDNN/cell_init_cudnn",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))

            cudnn_output, cudnn_last_hidden, cudnn_last_state, param_extract = rnn_cell.cudnn_LSTM(
                model=cudnn_model,
                input_blob=input_blob,
                initial_states=("hidden_init_cudnn", "cell_init_cudnn"),
                dim_in=input_dim,
                dim_out=hidden_dim,
                scope="CUDNN",
                return_params=True,
            )
            cudnn_loss = cudnn_model.AveragedLoss(
                cudnn_model.SquaredL2Distance([cudnn_output, "target"],
                                              "CUDNN/dist"), "CUDNN/loss")

            own_output, own_last_hidden, _, own_last_state, own_params = rnn_cell.LSTM(
                model=own_model,
                input_blob=input_blob,
                seq_lengths="seq_lengths",
                initial_states=("hidden_init", "cell_init"),
                dim_in=input_dim,
                dim_out=hidden_dim,
                scope="OWN",
                return_params=True,
            )
            own_loss = own_model.AveragedLoss(
                own_model.SquaredL2Distance([own_output, "target"],
                                            "OWN/dist"), "OWN/loss")

            # Add gradients
            cudnn_model.AddGradientOperators([cudnn_loss])
            own_model.AddGradientOperators([own_loss])

            # Add parameter updates
            LR = cudnn_model.param_init_net.ConstantFill([],
                                                         shape=[1],
                                                         value=0.01)
            ONE = cudnn_model.param_init_net.ConstantFill([],
                                                          shape=[1],
                                                          value=1.0)
            for param in cudnn_model.GetParams():
                cudnn_model.WeightedSum(
                    [param, ONE, cudnn_model.param_to_grad[param], LR], param)
            for param in own_model.GetParams():
                own_model.WeightedSum(
                    [param, ONE, own_model.param_to_grad[param], LR], param)

            # Copy states over
            own_model.net.Copy(own_last_hidden, "hidden_init")
            own_model.net.Copy(own_last_state, "cell_init")
            cudnn_model.net.Copy(cudnn_last_hidden, "CUDNN/hidden_init_cudnn")
            cudnn_model.net.Copy(cudnn_last_state, "CUDNN/cell_init_cudnn")

            workspace.RunNetOnce(cudnn_model.param_init_net)
            workspace.CreateNet(cudnn_model.net)

            ##
            ##  CUDNN LSTM MODEL EXECUTION
            ##
            # Get initial values from CuDNN LSTM so we can feed them
            # to our own.
            (param_extract_net, param_extract_mapping) = param_extract
            workspace.RunNetOnce(param_extract_net)
            cudnn_lstm_params = {
                input_type:
                {k: workspace.FetchBlob(v[0])
                 for k, v in viewitems(pars)}
                for input_type, pars in viewitems(param_extract_mapping)
            }

            # Run the model 3 times, so that some parameter updates are done
            workspace.RunNet(cudnn_model.net.Proto().name, 3)

            ##
            ## OWN LSTM MODEL EXECUTION
            ##
            # Map the cuDNN parameters to our own
            workspace.RunNetOnce(own_model.param_init_net)
            rnn_cell.InitFromLSTMParams(own_params, cudnn_lstm_params)

            # Run the model 3 times, so that some parameter updates are done
            workspace.CreateNet(own_model.net)
            workspace.RunNet(own_model.net.Proto().name, 3)

            ##
            ## COMPARE RESULTS
            ##
            # Then compare that final results after 3 runs are equal
            own_output_data = workspace.FetchBlob(own_output)
            own_last_hidden = workspace.FetchBlob(own_last_hidden)
            own_loss = workspace.FetchBlob(own_loss)

            cudnn_output_data = workspace.FetchBlob(cudnn_output)
            cudnn_last_hidden = workspace.FetchBlob(cudnn_last_hidden)
            cudnn_loss = workspace.FetchBlob(cudnn_loss)

            self.assertTrue(np.allclose(own_output_data, cudnn_output_data))
            self.assertTrue(np.allclose(own_last_hidden, cudnn_last_hidden))
            self.assertTrue(np.allclose(own_loss, cudnn_loss))
Пример #10
0
 def make_cell(*args, **kwargs):
     return rnn_cell.LSTM(*args, **kwargs)
Пример #11
0
        def make_lstm(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 8 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 4 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 4 * hidden_size],
                                ends  =[bias_offset + 8 * hidden_size])

            weight_offset = 4 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #   i o f c -> i f o c
            reforms = ((W_, 'i2h_w',     [(0, -1)]),
                       (R_, 'gates_t_w', [(0, -1)]),
                       (Bi, 'i2h_b'    , []),
                       (Br, 'gates_t_b', []))
            for name_from, name_to, extra_dims in reforms:
                xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
                for i, x in enumerate([xi, xo, xf, xc]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])
            initial_c_sliced = name + '/initial_c'
            init_net.Slice(initial_c, initial_c_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced, initial_c_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                return_params=True
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Пример #12
0
def rnn_bidirectional_encoder(
    model,
    embedded_inputs,
    input_lengths,
    initial_hidden_state,
    initial_cell_state,
    embedding_size,
    encoder_num_units,
    use_attention,
    scope=None,
):
    """ Bidirectional (forward pass and backward pass) LSTM encoder."""

    # Forward pass
    (
        outputs_fw,
        final_hidden_state_fw,
        _,
        final_cell_state_fw,
    ) = rnn_cell.LSTM(
        model=model,
        input_blob=embedded_inputs,
        seq_lengths=input_lengths,
        initial_states=(initial_hidden_state, initial_cell_state),
        dim_in=embedding_size,
        dim_out=encoder_num_units,
        scope=(scope + '/' if scope else '') + 'forward_encoder',
        outputs_with_grads=([0] if use_attention else [1, 3]),
    )

    # Backward pass
    reversed_embedded_inputs = model.net.ReversePackedSegs(
        [embedded_inputs, input_lengths],
        ['reversed_embedded_inputs'],
    )

    (
        outputs_bw,
        final_hidden_state_bw,
        _,
        final_cell_state_bw,
    ) = rnn_cell.LSTM(
        model=model,
        input_blob=reversed_embedded_inputs,
        seq_lengths=input_lengths,
        initial_states=(initial_hidden_state, initial_cell_state),
        dim_in=embedding_size,
        dim_out=encoder_num_units,
        scope=(scope + '/' if scope else '') + 'backward_encoder',
        outputs_with_grads=([0] if use_attention else [1, 3]),
    )

    outputs_bw = model.net.ReversePackedSegs(
        [outputs_bw, input_lengths],
        ['outputs_bw'],
    )

    # Concatenate forward and backward results
    outputs, _ = model.net.Concat(
        [outputs_fw, outputs_bw],
        ['outputs', 'outputs_dim'],
        axis=2,
    )

    final_hidden_state, _ = model.net.Concat(
        [final_hidden_state_fw, final_hidden_state_bw],
        ['final_hidden_state', 'final_hidden_state_dim'],
        axis=2,
    )

    final_cell_state, _ = model.net.Concat(
        [final_cell_state_fw, final_cell_state_bw],
        ['final_cell_state', 'final_cell_state_dim'],
        axis=2,
    )
    return outputs, final_hidden_state, final_cell_state
Пример #13
0
    def test_observer_rnn_executor(self, num_layers, forward_only):
        '''
        Test that the RNN executor produces same results as
        the non-executor (i.e running step nets as sequence of simple nets).
        '''

        Tseq = [2, 3, 4]
        batch_size = 10
        input_dim = 3
        hidden_dim = 3

        run_cnt = [0] * len(Tseq)
        avg_time = [0] * len(Tseq)
        for j in range(len(Tseq)):
            T = Tseq[j]

            ws.ResetWorkspace()
            ws.FeedBlob("seq_lengths",
                        np.array([T] * batch_size, dtype=np.int32))
            ws.FeedBlob(
                "target",
                np.random.rand(T, batch_size, hidden_dim).astype(np.float32))
            ws.FeedBlob(
                "hidden_init",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))
            ws.FeedBlob(
                "cell_init",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))

            model = model_helper.ModelHelper(name="lstm")
            model.net.AddExternalInputs(["input"])

            init_blobs = []
            for i in range(num_layers):
                hidden_init, cell_init = model.net.AddExternalInputs(
                    "hidden_init_{}".format(i), "cell_init_{}".format(i))
                init_blobs.extend([hidden_init, cell_init])

            output, last_hidden, _, last_state = rnn_cell.LSTM(
                model=model,
                input_blob="input",
                seq_lengths="seq_lengths",
                initial_states=init_blobs,
                dim_in=input_dim,
                dim_out=[hidden_dim] * num_layers,
                drop_states=True,
                forward_only=forward_only,
                return_last_layer_only=True,
            )

            loss = model.AveragedLoss(
                model.SquaredL2Distance([output, "target"], "dist"), "loss")
            # Add gradient ops
            if not forward_only:
                model.AddGradientOperators([loss])

            # init
            for init_blob in init_blobs:
                ws.FeedBlob(
                    init_blob,
                    np.zeros([1, batch_size, hidden_dim], dtype=np.float32))
            ws.RunNetOnce(model.param_init_net)

            # Run with executor
            self.enable_rnn_executor(model.net, 1, forward_only)

            np.random.seed(10022015)
            input_shape = [T, batch_size, input_dim]
            ws.FeedBlob("input",
                        np.random.rand(*input_shape).astype(np.float32))
            ws.FeedBlob(
                "target",
                np.random.rand(T, batch_size, hidden_dim).astype(np.float32))
            ws.CreateNet(model.net, overwrite=True)

            time_ob = model.net.AddObserver("TimeObserver")
            run_cnt_ob = model.net.AddObserver("RunCountObserver")
            ws.RunNet(model.net)
            avg_time[j] = time_ob.average_time()
            run_cnt[j] = int(''.join(x for x in run_cnt_ob.debug_info()
                                     if x.isdigit()))
            model.net.RemoveObserver(time_ob)
            model.net.RemoveObserver(run_cnt_ob)

        print(avg_time)
        print(run_cnt)
        self.assertTrue(run_cnt[1] > run_cnt[0] and run_cnt[2] > run_cnt[1])
        self.assertEqual(run_cnt[1] - run_cnt[0], run_cnt[2] - run_cnt[1])
    def test_lstm_equal_simplenet(self, num_layers, T, forward_only, gc, dc):
        '''
        Test that the RNN executor produces same results as
        the non-executor (i.e running step nets as sequence of simple nets).
        '''
        self.Tseq = [T, T // 2, T // 2 + T // 4, T, T // 2 + 1]

        workspace.ResetWorkspace()
        with core.DeviceScope(gc):
            print("Run with device: {}, forward only: {}".format(
                gc, forward_only))

            workspace.FeedBlob("seq_lengths",
                               np.array([T] * self.batch_size, dtype=np.int32))
            workspace.FeedBlob(
                "target",
                np.random.rand(T, self.batch_size,
                               self.hidden_dim).astype(np.float32))
            workspace.FeedBlob(
                "hidden_init",
                np.zeros([1, self.batch_size, self.hidden_dim],
                         dtype=np.float32))
            workspace.FeedBlob(
                "cell_init",
                np.zeros([1, self.batch_size, self.hidden_dim],
                         dtype=np.float32))

            model = model_helper.ModelHelper(name="lstm")
            model.net.AddExternalInputs(["input"])

            init_blobs = []
            for i in range(num_layers):
                hidden_init, cell_init = model.net.AddExternalInputs(
                    "hidden_init_{}".format(i), "cell_init_{}".format(i))
                init_blobs.extend([hidden_init, cell_init])

            output, last_hidden, _, last_state = rnn_cell.LSTM(
                model=model,
                input_blob="input",
                seq_lengths="seq_lengths",
                initial_states=init_blobs,
                dim_in=self.input_dim,
                dim_out=[self.hidden_dim] * num_layers,
                scope="",
                drop_states=True,
                forward_only=forward_only,
                return_last_layer_only=True,
            )

            loss = model.AveragedLoss(
                model.SquaredL2Distance([output, "target"], "dist"), "loss")
            # Add gradient ops
            if not forward_only:
                model.AddGradientOperators([loss])

            # init
            for init_blob in init_blobs:
                workspace.FeedBlob(
                    init_blob,
                    np.zeros([1, self.batch_size, self.hidden_dim],
                             dtype=np.float32))

            self._compare(model, forward_only)
Пример #15
0
    def test_lstm_extract_predictor_net(self):
        model = ModelHelper(name="lstm_extract_test")

        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
            output, _, _, _ = rnn_cell.LSTM(
                model=model,
                input_blob="input",
                seq_lengths="seqlengths",
                initial_states=("hidden_init", "cell_init"),
                dim_in=20,
                dim_out=40,
                scope="test",
                drop_states=True,
                return_last_layer_only=True,
            )
        # Run param init net to get the shapes for all inputs
        shapes = {}
        workspace.RunNetOnce(model.param_init_net)
        for b in workspace.Blobs():
            shapes[b] = workspace.FetchBlob(b).shape

        # But export in CPU
        (predict_net, export_blobs) = ExtractPredictorNet(
            net_proto=model.net.Proto(),
            input_blobs=["input"],
            output_blobs=[output],
            device=core.DeviceOption(caffe2_pb2.CPU, 1),
        )

        # Create the net and run once to see it is valid
        # Populate external inputs with correctly shaped random input
        # and also ensure that the export_blobs was constructed correctly.
        workspace.ResetWorkspace()
        shapes['input'] = [10, 4, 20]
        shapes['cell_init'] = [1, 4, 40]
        shapes['hidden_init'] = [1, 4, 40]

        print(predict_net.Proto().external_input)
        self.assertTrue('seqlengths' in predict_net.Proto().external_input)
        for einp in predict_net.Proto().external_input:
            if einp == 'seqlengths':
                    workspace.FeedBlob(
                        "seqlengths",
                        np.array([10] * 4, dtype=np.int32)
                    )
            else:
                workspace.FeedBlob(
                    einp,
                    np.zeros(shapes[einp]).astype(np.float32),
                )
                if einp != 'input':
                    self.assertTrue(einp in export_blobs)

        print(str(predict_net.Proto()))
        self.assertTrue(workspace.CreateNet(predict_net.Proto()))
        self.assertTrue(workspace.RunNet(predict_net.Proto().name))

        # Validate device options set correctly for the RNNs
        import google.protobuf.text_format as protobuftx
        for op in predict_net.Proto().op:
            if op.type == 'RecurrentNetwork':
                for arg in op.arg:
                    if arg.name == "step_net":
                        step_proto = caffe2_pb2.NetDef()
                        protobuftx.Merge(arg.s.decode("ascii"), step_proto)
                        for step_op in step_proto.op:
                            self.assertEqual(0, step_op.device_option.device_type)
                            self.assertEqual(1, step_op.device_option.cuda_gpu_id)
                    elif arg.name == 'backward_step_net':
                        self.assertEqual(b"", arg.s)
Пример #16
0
    def model_build_fun(self, model, forward_only=False, loss_scale=None):
        encoder_inputs = model.net.AddExternalInput(
            workspace.GetNameScope() + 'encoder_inputs', )
        encoder_lengths = model.net.AddExternalInput(
            workspace.GetNameScope() + 'encoder_lengths', )
        decoder_inputs = model.net.AddExternalInput(
            workspace.GetNameScope() + 'decoder_inputs', )
        decoder_lengths = model.net.AddExternalInput(
            workspace.GetNameScope() + 'decoder_lengths', )
        targets = model.net.AddExternalInput(
            workspace.GetNameScope() + 'targets', )
        target_weights = model.net.AddExternalInput(
            workspace.GetNameScope() + 'target_weights', )
        attention_type = self.model_params['attention']
        assert attention_type in ['none', 'regular']

        (
            encoder_outputs,
            weighted_encoder_outputs,
            final_encoder_hidden_state,
            final_encoder_cell_state,
            encoder_output_dim,
        ) = seq2seq_util.build_embedding_encoder(
            model=model,
            encoder_params=self.encoder_params,
            inputs=encoder_inputs,
            input_lengths=encoder_lengths,
            vocab_size=self.source_vocab_size,
            embeddings=self.encoder_embeddings,
            embedding_size=self.model_params['encoder_embedding_size'],
            use_attention=(attention_type != 'none'),
            num_gpus=self.num_gpus,
        )

        assert len(self.model_params['decoder_layer_configs']) == 1
        decoder_num_units = (
            self.model_params['decoder_layer_configs'][0]['num_units'])
        initial_states = seq2seq_util.build_initial_rnn_decoder_states(
            model=model,
            encoder_num_units=encoder_output_dim,
            decoder_num_units=decoder_num_units,
            final_encoder_hidden_state=final_encoder_hidden_state,
            final_encoder_cell_state=final_encoder_cell_state,
            use_attention=(attention_type != 'none'),
        )

        if self.num_gpus == 0:
            embedded_decoder_inputs = model.net.Gather(
                [self.decoder_embeddings, decoder_inputs],
                ['embedded_decoder_inputs'],
            )
        else:
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
                embedded_decoder_inputs_cpu = model.net.Gather(
                    [self.decoder_embeddings, decoder_inputs],
                    ['embedded_decoder_inputs_cpu'],
                )
            embedded_decoder_inputs = model.CopyCPUToGPU(
                embedded_decoder_inputs_cpu,
                'embedded_decoder_inputs',
            )

        # seq_len x batch_size x decoder_embedding_size
        if attention_type == 'none':
            decoder_outputs, _, _, _ = rnn_cell.LSTM(
                model=model,
                input_blob=embedded_decoder_inputs,
                seq_lengths=decoder_lengths,
                initial_states=initial_states,
                dim_in=self.model_params['decoder_embedding_size'],
                dim_out=decoder_num_units,
                scope='decoder',
                outputs_with_grads=[0],
            )
            decoder_output_size = decoder_num_units
        else:
            (decoder_outputs, _, _, _, attention_weighted_encoder_contexts,
             _) = rnn_cell.LSTMWithAttention(
                 model=model,
                 decoder_inputs=embedded_decoder_inputs,
                 decoder_input_lengths=decoder_lengths,
                 initial_decoder_hidden_state=initial_states[0],
                 initial_decoder_cell_state=initial_states[1],
                 initial_attention_weighted_encoder_context=initial_states[2],
                 encoder_output_dim=encoder_output_dim,
                 encoder_outputs=encoder_outputs,
                 decoder_input_dim=self.model_params['decoder_embedding_size'],
                 decoder_state_dim=decoder_num_units,
                 scope='decoder',
                 outputs_with_grads=[0, 4],
             )
            decoder_outputs, _ = model.net.Concat(
                [decoder_outputs, attention_weighted_encoder_contexts],
                [
                    'states_and_context_combination',
                    '_states_and_context_combination_concat_dims',
                ],
                axis=2,
            )
            decoder_output_size = decoder_num_units + encoder_output_dim

        # we do softmax over the whole sequence
        # (max_length in the batch * batch_size) x decoder embedding size
        # -1 because we don't know max_length yet
        decoder_outputs_flattened, _ = model.net.Reshape(
            [decoder_outputs],
            [
                'decoder_outputs_flattened',
                'decoder_outputs_and_contexts_combination_old_shape',
            ],
            shape=[-1, decoder_output_size],
        )
        output_logits = seq2seq_util.output_projection(
            model=model,
            decoder_outputs=decoder_outputs_flattened,
            decoder_output_size=decoder_output_size,
            target_vocab_size=self.target_vocab_size,
            decoder_softmax_size=self.model_params['decoder_softmax_size'],
        )
        targets, _ = model.net.Reshape(
            [targets],
            ['targets', 'targets_old_shape'],
            shape=[-1],
        )
        target_weights, _ = model.net.Reshape(
            [target_weights],
            ['target_weights', 'target_weights_old_shape'],
            shape=[-1],
        )
        output_probs = model.net.Softmax(
            [output_logits],
            ['output_probs'],
            engine=('CUDNN' if self.num_gpus > 0 else None),
        )
        label_cross_entropy = model.net.LabelCrossEntropy(
            [output_probs, targets],
            ['label_cross_entropy'],
        )
        weighted_label_cross_entropy = model.net.Mul(
            [label_cross_entropy, target_weights],
            'weighted_label_cross_entropy',
        )
        total_loss_scalar = model.net.SumElements(
            [weighted_label_cross_entropy],
            'total_loss_scalar',
        )
        total_loss_scalar_weighted = model.net.Scale(
            [total_loss_scalar],
            'total_loss_scalar_weighted',
            scale=1.0 / self.batch_size,
        )
        return [total_loss_scalar_weighted]
Пример #17
0
def create_model(args, queue, label_queue, input_shape):
    model = model_helper.ModelHelper(name="LSTM_bench")
    seq_lengths, target = \
        model.net.AddExternalInputs(
            'seq_lengths',
            'target',
        )

    input_blob = model.net.DequeueBlobs(queue, "input_data")
    labels = model.net.DequeueBlobs(label_queue, "label")

    init_blobs = []
    if args.implementation in ["own", "static", "static_dag"]:
        T = None
        if "static" in args.implementation:
            assert args.fixed_shape, \
                "Random input length is not static RNN compatible"
            T = args.seq_length
            print("Using static RNN of size {}".format(T))

        for i in range(args.num_layers):
            hidden_init, cell_init = model.net.AddExternalInputs(
                "hidden_init_{}".format(i), "cell_init_{}".format(i))
            init_blobs.extend([hidden_init, cell_init])

        output, last_hidden, _, last_state = rnn_cell.LSTM(
            model=model,
            input_blob=input_blob,
            seq_lengths=seq_lengths,
            initial_states=init_blobs,
            dim_in=args.input_dim,
            dim_out=[args.hidden_dim] * args.num_layers,
            scope="lstm1",
            memory_optimization=args.memory_optimization,
            forward_only=args.forward_only,
            drop_states=True,
            return_last_layer_only=True,
            static_rnn_unroll_size=T,
        )

        if "dag" in args.implementation:
            print("Using DAG net type")
            model.net.Proto().type = 'dag'
            model.net.Proto().num_workers = 4

    elif args.implementation == "cudnn":
        # We need to feed a placeholder input so that RecurrentInitOp
        # can infer the dimensions.
        init_blobs = model.net.AddExternalInputs("hidden_init", "cell_init")
        model.param_init_net.ConstantFill([], input_blob, shape=input_shape)
        output, last_hidden, _ = rnn_cell.cudnn_LSTM(
            model=model,
            input_blob=input_blob,
            initial_states=init_blobs,
            dim_in=args.input_dim,
            dim_out=args.hidden_dim,
            scope="cudnnlstm",
            num_layers=args.num_layers,
        )

    else:
        assert False, "Unknown implementation"

    weights = model.net.UniformFill(labels, "weights")
    softmax, loss = model.net.SoftmaxWithLoss(
        [model.Flatten(output), labels, weights],
        ['softmax', 'loss'],
    )

    if not args.forward_only:
        model.AddGradientOperators([loss])

    # carry states over
    for init_blob in init_blobs:
        model.net.Copy(last_hidden, init_blob)

        sz = args.hidden_dim
        if args.implementation == "cudnn":
            sz *= args.num_layers
        workspace.FeedBlob(
            init_blob, np.zeros([1, args.batch_size, sz], dtype=np.float32))

    if args.rnn_executor:
        for op in model.net.Proto().op:
            if op.type.startswith('RecurrentNetwork'):
                recurrent.set_rnn_executor_config(
                    op,
                    num_threads=args.rnn_executor_num_threads,
                    max_cuda_streams=args.rnn_executor_max_cuda_streams,
                )
    return model, output
Пример #18
0
def create_model(args, queue, label_queue, input_shape):
    model = cnn.CNNModelHelper(name="LSTM_bench")
    seq_lengths, target = \
        model.net.AddExternalInputs(
            'seq_lengths',
            'target',
        )

    input_blob = model.DequeueBlobs(queue, "input_data")
    labels = model.DequeueBlobs(label_queue, "label")

    init_blobs = []
    if args.implementation == "own":
        for i in range(args.num_layers):
            init_blobs.append("hidden_init_{}".format(i))
            init_blobs.append("cell_init_{}".format(i))
        model.net.AddExternalInputs(init_blobs)

        output, last_hidden, _, last_state = rnn_cell.LSTM(
            model=model,
            input_blob=input_blob,
            seq_lengths=seq_lengths,
            initial_states=init_blobs,
            dim_in=args.input_dim,
            dim_out=[args.hidden_dim] * args.num_layers,
            scope="lstm1",
            memory_optimization=args.memory_optimization,
            forward_only=args.forward_only,
            drop_states=True,
            return_last_layer_only=True,
        )
    elif args.implementation == "cudnn":
        # We need to feed a placeholder input so that RecurrentInitOp
        # can infer the dimensions.
        init_blobs = model.net.AddExternalInputs("hidden_init", "cell_init")
        model.param_init_net.ConstantFill([], input_blob, shape=input_shape)
        output, last_hidden, _ = rnn_cell.cudnn_LSTM(
            model=model,
            input_blob=input_blob,
            initial_states=init_blobs,
            dim_in=args.input_dim,
            dim_out=args.hidden_dim,
            scope="cudnnlstm",
            num_layers=args.num_layers,
        )

    else:
        assert False, "Unknown implementation"

    weights = model.UniformFill(labels, "weights")
    softmax, loss = model.SoftmaxWithLoss(
        [model.Flatten(output), labels, weights],
        ['softmax', 'loss'],
    )

    if not args.forward_only:
        model.AddGradientOperators([loss])

    # carry states over
    for init_blob in init_blobs:
        model.net.Copy(last_hidden, init_blob)

        sz = args.hidden_dim
        if args.implementation == "cudnn":
            sz *= args.num_layers
        workspace.FeedBlob(init_blob, np.zeros(
            [1, args.batch_size, sz], dtype=np.float32
        ))
    return model, output