示例#1
0
    def test(self, session, input_batch, pose_batch, initial_states=None):
        '''Get network predictions for some input sequence and compute average error.

        Parameters
        ----------
        session :   tf.Session
                    Session to run ops in
        input_batch  :  np.ndarray
                        Array of shape ``(batch_size, sequence_length, h, w, 6)`` where two consecutive
                        rgb images are stacked together.
        pose_batch  :   np.ndarray
                        Array of shape ``(batch_size, sequence_length, 6)`` with Poses
        '''
        batch_size = input_batch.shape[0]

        if initial_states is None:
            initial_states = tensor_from_lstm_tuple(
                self.get_zero_state(session, batch_size))

        fetches = [*self.predictions, self.loss, self.rnn_state]
        y_t, y_r, loss, states = session.run(fetches,
                                             feed_dict={
                                                 self.batch_size: batch_size,
                                                 self.target_poses: pose_batch,
                                                 self.input_images:
                                                 input_batch,
                                                 self.lstm_states:
                                                 initial_states
                                             })
        return y_t, y_r, loss, states
示例#2
0
    def train(self,
              session,
              input_batch,
              pose_batch,
              initial_states=None,
              return_prediction=False):
        '''Train the network.

        Parameters
        ----------
        session :   tf.Session
                    Session to execute op in
        input_batch  :  np.ndarray
                        Array of shape ``(batch_size, sequence_length, h, w, 6)`` where two consecutive
                        rgb images are stacked together.
        pose_batch  :   np.ndarray
                        Array of shape ``(batch_size, sequence_length, 6)`` with Poses
        initial_states   :  np.ndarray
                            Array of shape ``(2, 2, batch_size, memory_size)``

        Returns
        -------
        tuple(np.ndarray)
            Outputs of the ``train_step``, ``loss``, and ``rnn_state`` operations, and optionally
            the predictions for r and t at the front
        '''
        batch_size = input_batch.shape[0]

        if initial_states is None:
            zero_state = self.get_zero_state(session, batch_size)
            initial_states = tensor_from_lstm_tuple(zero_state)

        if return_prediction:
            fetches = [
                self.y_t, self.y_r, self.train_step, self.loss, self.rnn_state
            ]
        else:
            fetches = [self.train_step, self.loss, self.rnn_state]

        return session.run(fetches,
                           feed_dict={
                               self.batch_size: batch_size,
                               self.input_images: input_batch,
                               self.target_poses: pose_batch,
                               self.lstm_states: initial_states
                           })
示例#3
0
    def __init__(self,
                 image_shape,
                 memory_size,
                 sequence_length,
                 optimizer_spec=None,
                 resize_images=False,
                 use_dropout=True,
                 use_flownet=False):
        '''
        Parameters
        ----------
        image_shape :   tuple
        memory_size :   int
                        LSTM state size (identical for both layers)
        sequence_length :   int
                            Length of the video stream
        optimizer_spec  :   OptimizerSpec
                            Specification of the optimizer
        resize_images   :   bool
                            Rezise images to a multiple of 64
        use_dropout :   bool
                        Do not use dropout for LSTM cells
        use_flownet :   bool
                        Name CNN vars according to flownet naming scheme. You *must* call
                        :py:meth:`load_flownet`  before pushing stuff through the graph.
        '''
        if not optimizer_spec:
            optimizer_spec = OptimizerSpec(kind='Adagrad', learning_rate=0.001)
        optimizer = optimizer_spec.create()
        self.use_dropout = use_dropout
        self.use_flownet = use_flownet
        self.sequence_length = sequence_length
        ############################################################################################
        #                                          Inputs                                          #
        ############################################################################################
        with tf.variable_scope('inputs'):
            h, w, c = image_shape
            self.input_images = tf.placeholder(
                tf.float32,
                shape=[None, sequence_length, h, w, 2 * c],
                name='imgs')
            if resize_images:
                self.input_images = resize_to_multiple(self.images, 64)

            self.target_poses = tf.placeholder(
                tf.float32,
                shape=[None, sequence_length, 6],
                name='target_poses')
            # this placeholder is used for feeding both the cell and hidden states of both lstm
            # cells. The cell state comes before the hidden state
            N_lstm = 2
            self.lstm_states = tf.placeholder(tf.float32,
                                              shape=(N_lstm, 2, None,
                                                     memory_size),
                                              name='LSTM_states')
            self.batch_size = tf.placeholder(tf.int32,
                                             shape=[],
                                             name='batch_size')

        ############################################################################################
        #                                       Convolutions                                       #
        ############################################################################################
        ksizes = [7, 5, 5, 3, 3, 3, 3, 3, 3]
        strides = [2, 2, 2, 1, 2, 1, 2, 1, 2]
        n_channels = [64, 128, 256, 256, 512, 512, 512, 512, 1024]

        self.cnn_activations = []
        # we call cnn() in a loop, but the variables will be reused after first creation
        for idx in range(sequence_length):
            stacked_image = self.input_images[:, idx, :]
            cnn_activation = self.cnn(stacked_image,
                                      ksizes,
                                      strides,
                                      n_channels,
                                      reuse=tf.AUTO_REUSE)
            self.cnn_activations.append(cnn_activation)

        # compute number of activations for flattening the conv output
        def num_activations(conv):
            return np.prod(conv.shape[1:].as_list())

        # flatten cnn output for each batch element
        rnn_inputs = [
            tf.reshape(
                conv, [self.batch_size, num_activations(conv)])
            for conv in self.cnn_activations
        ]

        ############################################################################################
        #                                           LSTM                                           #
        ############################################################################################
        with tf.variable_scope('rnn'):
            '''Create all recurrent layers as specified in the paper.'''
            lstm0 = LSTMCell(memory_size, state_is_tuple=True)
            lstm1 = LSTMCell(memory_size, state_is_tuple=True)
            if self.use_dropout:
                lstm_keep_probs = [0.7, 0.8]
                lstm0 = DropoutWrapper(lstm0,
                                       output_keep_prob=lstm_keep_probs[0])
                lstm1 = DropoutWrapper(lstm1,
                                       output_keep_prob=lstm_keep_probs[1])
            self.rnn = MultiRNNCell([lstm0, lstm1])
            self.zero_state = self.rnn.zero_state(self.batch_size, tf.float32)

            # first decompose state input into the two layers
            states0 = self.lstm_states[0, ...]
            states0 = self.lstm_states[1, ...]

            # then retrieve two memory_size-sized tensors from each state item
            states0_list = tf.unstack(states0, num=2)
            cell_state0 = states0_list[0]
            hidden_state0 = states0_list[1]

            states1_list = tf.unstack(states0, num=2)
            cell_state1 = states1_list[0]
            hidden_state1 = states1_list[1]

            # finally, create the state tuples
            state0 = LSTMStateTuple(c=hidden_state0, h=cell_state0)
            state1 = LSTMStateTuple(c=hidden_state1, h=cell_state1)

            sequence_lengths = tf.ones(
                (self.batch_size, ), dtype=tf.int32) * sequence_length
            rnn_outputs, rnn_state = static_rnn(
                self.rnn,
                rnn_inputs,
                dtype=tf.float32,
                initial_state=(state0, state1),
                sequence_length=sequence_lengths)
            rnn_outputs = tf.reshape(
                tf.concat(rnn_outputs,
                          1), [self.batch_size, sequence_length, memory_size])
            self.rnn_state = tensor_from_lstm_tuple(rnn_state)

        ############################################################################################
        #                                       Output layer                                       #
        ############################################################################################
        with tf.variable_scope('feedforward'):
            n_rnn_output = memory_size  # number of activations per batch
            kernel_initializer = tf.random_normal_initializer(
                stddev=np.sqrt(2 / n_rnn_output))
            # predictions
            # I know we shouldn't use tf.layers but since dense + conv are easy to implement by
            # oneself we decided to remove this possible source of errors and focus on the many
            # others
            y = tf.layers.dense(rnn_outputs,
                                6,
                                kernel_initializer=kernel_initializer)
            # decompose into translational and rotational component
            self.y_t, self.y_r = tf.split(y, 2, axis=2)
            self.x_t, self.x_r = tf.split(self.target_poses, 2, axis=2)
            self.predictions = (self.y_t, self.y_r)

        self.loss = self.loss_function((self.x_t, self.x_r),
                                       (self.y_t, self.y_r))
        with tf.variable_scope('optimizer'):
            self.train_step = optimizer.minimize(self.loss)