def _test_three_recurrent_children(self, gpu):
        # Test if https://github.com/chainer/chainer/issues/6053 is addressed
        in_size = 2
        out_size = 6

        rseq = StatelessRecurrentSequential(
            L.NStepLSTM(1, in_size, 3, 0),
            L.NStepGRU(2, 3, 4, 0),
            L.NStepRNNTanh(5, 4, out_size, 0),
        )

        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp

        seqs_x = [
            xp.random.uniform(-1, 1, size=(4, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(1, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(3, in_size)).astype(np.float32),
        ]

        # Make and load a recurrent state to check if the order is correct.
        _, rs = rseq.n_step_forward(seqs_x, None, output_mode='concat')
        _, _ = rseq.n_step_forward(seqs_x, rs, output_mode='concat')

        _, rs = rseq.n_step_forward(seqs_x, None, output_mode='split')
        _, _ = rseq.n_step_forward(seqs_x, rs, output_mode='split')
    def _test_n_step_forward_with_tuple_output(self, gpu):
        in_size = 5
        out_size = 6

        def split_output(x):
            return tuple(F.split_axis(x, [2, 3], axis=1))

        rseq = StatelessRecurrentSequential(
            L.NStepRNNTanh(1, in_size, out_size, 0),
            split_output,
        )

        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp

        # Input is a list of two variables.
        seqs_x = [
            xp.random.uniform(-1, 1, size=(3, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
        ]

        # Concatenated output should be a tuple of three variables.
        concat_out, concat_state = rseq.n_step_forward(seqs_x,
                                                       None,
                                                       output_mode='concat')
        self.assertIsInstance(concat_out, tuple)
        self.assertEqual(len(concat_out), 3)
        self.assertEqual(concat_out[0].shape, (5, 2))
        self.assertEqual(concat_out[1].shape, (5, 1))
        self.assertEqual(concat_out[2].shape, (5, 3))

        # Split output should be a list of two tuples, each of which is of
        # three variables.
        split_out, split_state = rseq.n_step_forward(seqs_x,
                                                     None,
                                                     output_mode='split')
        self.assertIsInstance(split_out, list)
        self.assertEqual(len(split_out), 2)
        self.assertIsInstance(split_out[0], tuple)
        self.assertIsInstance(split_out[1], tuple)
        for seq_x, seq_out in zip(seqs_x, split_out):
            self.assertEqual(len(seq_out), 3)
            self.assertEqual(seq_out[0].shape, (len(seq_x), 2))
            self.assertEqual(seq_out[1].shape, (len(seq_x), 1))
            self.assertEqual(seq_out[2].shape, (len(seq_x), 3))

        # Check if output_mode='concat' and output_mode='split' are consistent
        xp.testing.assert_allclose(
            F.concat([F.concat(seq_out, axis=1) for seq_out in split_out],
                     axis=0).array,
            F.concat(concat_out, axis=1).array,
        )
    def _test_n_step_forward_with_tuple_input(self, gpu):
        in_size = 5
        out_size = 3

        def concat_input(*args):
            return F.concat(args, axis=1)

        rseq = StatelessRecurrentSequential(
            concat_input,
            L.NStepRNNTanh(1, in_size, out_size, 0),
        )

        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp

        # Input is list of tuples. Each tuple has two variables.
        seqs_x = [
            (xp.random.uniform(-1, 1, size=(3, 2)).astype(np.float32),
             xp.random.uniform(-1, 1, size=(3, 3)).astype(np.float32)),
            (xp.random.uniform(-1, 1, size=(1, 2)).astype(np.float32),
             xp.random.uniform(-1, 1, size=(1, 3)).astype(np.float32)),
        ]

        # Concatenated output should be a variable.
        concat_out, concat_state = rseq.n_step_forward(seqs_x,
                                                       None,
                                                       output_mode='concat')
        self.assertEqual(concat_out.shape, (4, out_size))

        # Split output should be a list of variables.
        split_out, split_state = rseq.n_step_forward(seqs_x,
                                                     None,
                                                     output_mode='split')
        self.assertIsInstance(split_out, list)
        self.assertEqual(len(split_out), len(seqs_x))
        for seq_x, seq_out in zip(seqs_x, split_out):
            self.assertEqual(seq_out.shape, (len(seq_x), out_size))

        # Check if output_mode='concat' and output_mode='split' are consistent
        xp.testing.assert_allclose(
            F.concat(split_out, axis=0).array,
            concat_out.array,
        )
    def _test_n_step_forward(self, gpu):
        in_size = 2
        out_size = 6

        rseq = StatelessRecurrentSequential(
            L.Linear(in_size, 3),
            F.elu,
            L.NStepLSTM(1, 3, 4, 0),
            L.Linear(4, 5),
            L.NStepRNNTanh(1, 5, out_size, 0),
            F.tanh,
        )

        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp

        linear1 = rseq._layers[0]
        lstm = rseq._layers[2]
        linear2 = rseq._layers[3]
        rnn = rseq._layers[4]

        seqs_x = [
            xp.random.uniform(-1, 1, size=(4, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(1, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(3, in_size)).astype(np.float32),
        ]

        concat_out, concat_state = rseq.n_step_forward(
            seqs_x, None, output_mode='concat')
        self.assertEqual(concat_out.shape, (8, out_size))

        split_out, split_state = rseq.n_step_forward(
            seqs_x, None, output_mode='split')
        self.assertIsInstance(split_out, list)
        self.assertEqual(len(split_out), len(seqs_x))
        for seq_x, seq_out in zip(seqs_x, split_out):
            self.assertEqual(seq_out.shape, (len(seq_x), out_size))

        # Check if output_mode='concat' and output_mode='split' are consistent
        xp.testing.assert_allclose(
            F.concat(split_out, axis=0).array,
            concat_out.array,
        )

        (concat_lstm_h, concat_lstm_c), concat_rnn_h = concat_state
        (split_lstm_h, split_lstm_c), split_rnn_h = split_state
        xp.testing.assert_allclose(concat_lstm_h.array, split_lstm_h.array)
        xp.testing.assert_allclose(concat_lstm_c.array, split_lstm_c.array)
        xp.testing.assert_allclose(concat_rnn_h.array, split_rnn_h.array)

        # Check if the output matches that of step-by-step execution
        def manual_n_step_forward(seqs_x):
            sorted_seqs_x = sorted(seqs_x, key=len, reverse=True)
            transposed_x = F.transpose_sequence(sorted_seqs_x)
            lstm_h = None
            lstm_c = None
            rnn_h = None
            ys = []
            for batch in transposed_x:
                if lstm_h is not None:
                    lstm_h = lstm_h[:len(batch)]
                    lstm_c = lstm_c[:len(batch)]
                    rnn_h = rnn_h[:len(batch)]
                h = linear1(batch)
                h = F.elu(h)
                h, (lstm_h, lstm_c) = _step_lstm(lstm, h, (lstm_h, lstm_c))
                h = linear2(h)
                h, rnn_h = _step_rnn_tanh(rnn, h, rnn_h)
                y = F.tanh(h)
                ys.append(y)
            sorted_seqs_y = F.transpose_sequence(ys)
            # Undo sort
            seqs_y = [sorted_seqs_y[0], sorted_seqs_y[2], sorted_seqs_y[1]]
            return seqs_y

        manual_split_out = manual_n_step_forward(seqs_x)
        for man_seq_out, seq_out in zip(manual_split_out, split_out):
            xp.testing.assert_allclose(
                man_seq_out.array, seq_out.array, rtol=1e-5)

        # Finally, check the gradient (wrt linear1.W)
        concat_grad, = chainer.grad([F.sum(concat_out)], [linear1.W])
        split_grad, = chainer.grad(
            [F.sum(F.concat(split_out, axis=0))], [linear1.W])
        manual_split_grad, = chainer.grad(
            [F.sum(F.concat(manual_split_out, axis=0))], [linear1.W])
        xp.testing.assert_allclose(
            concat_grad.array, split_grad.array, rtol=1e-5)
        xp.testing.assert_allclose(
            concat_grad.array, manual_split_grad.array, rtol=1e-5)
    def _test_mask_recurrent_state_at(self, gpu):
        in_size = 2
        out_size = 4
        rseq = StatelessRecurrentSequential(
            L.Linear(in_size, 3),
            F.elu,
            L.NStepGRU(1, 3, out_size, 0),
            F.softmax,
        )
        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp
        seqs_x = [
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
        ]
        transposed_x = F.transpose_sequence(seqs_x)
        print('transposed_x[0]', transposed_x[0])

        def no_mask_n_step_forward():
            nomask_nstep_out, nstep_rs = rseq.n_step_forward(
                seqs_x, None, output_mode='concat')
            return F.reshape(nomask_nstep_out, (2, 2, out_size)), nstep_rs
        nstep_out, nstep_rs = no_mask_n_step_forward()

        # Check if n_step_forward and forward twice results are same
        def no_mask_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            return rseq(transposed_x[1], rs)
        nomask_out, nomask_rs = no_mask_forward_twice()
        xp.testing.assert_allclose(
            nstep_out.array[:, 1],
            nomask_out.array,
        )
        xp.testing.assert_allclose(nstep_rs[0].array, nomask_rs[0].array)

        # 1st-only mask forward twice: only 2nd should be the same
        def mask0_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, 0)
            return rseq(transposed_x[1], rs)
        mask0_out, mask0_rs = mask0_forward_twice()
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[0, 1],
                mask0_out.array[0],
            )
        xp.testing.assert_allclose(
            nstep_out.array[1, 1],
            mask0_out.array[1],
        )

        # 2nd-only mask forward twice: only 1st should be the same
        def mask1_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, 1)
            return rseq(transposed_x[1], rs)
        mask1_out, mask1_rs = mask1_forward_twice()
        xp.testing.assert_allclose(
            nstep_out.array[0, 1],
            mask1_out.array[0],
        )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[1, 1],
                mask1_out.array[1],
            )

        # both 1st and 2nd mask forward twice: both should be different
        def mask01_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, [0, 1])
            return rseq(transposed_x[1], rs)
        mask01_out, mask01_rs = mask01_forward_twice()
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[0, 1],
                mask01_out.array[0],
            )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[1, 1],
                mask01_out.array[1],
            )

        # get and concat recurrent states and resume forward
        def get_and_concat_rs_forward():
            _, rs = rseq(transposed_x[0], None)
            rs0 = rseq.get_recurrent_state_at(rs, 0, unwrap_variable=True)
            rs1 = rseq.get_recurrent_state_at(rs, 1, unwrap_variable=True)
            concat_rs = rseq.concatenate_recurrent_states([rs0, rs1])
            return rseq(transposed_x[1], concat_rs)
        getcon_out, getcon_rs = get_and_concat_rs_forward()
        xp.testing.assert_allclose(getcon_rs[0].array, nomask_rs[0].array)
        xp.testing.assert_allclose(
            nstep_out.array[0, 1], getcon_out.array[0])
        xp.testing.assert_allclose(
            nstep_out.array[1, 1], getcon_out.array[1])