Python lstm_forward примеры использования

Язык программирования: Python

Пространство имен/Пакет: cs231n.rnn_layers

Метод/Функция: lstm_forward

Примеров на hotexamples.com: 2

Python lstm_forward - 2 примера найдено. Это лучшие примеры Python кода для cs231n.rnn_layers.lstm_forward, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: q2.py Проект: nihuo76/CS231n_assignment3

print('dx error: ', rel_error(dx_num, dx))
print('dh error: ', rel_error(dh_num, dh))
print('dc error: ', rel_error(dc_num, dc))
print('dWx error: ', rel_error(dWx_num, dWx))
print('dWh error: ', rel_error(dWh_num, dWh))
print('db error: ', rel_error(db_num, db))


N, D, H, T = 2, 5, 4, 3
x = np.linspace(-0.4, 0.6, num=N*T*D).reshape(N, T, D)
h0 = np.linspace(-0.4, 0.8, num=N*H).reshape(N, H)
Wx = np.linspace(-0.2, 0.9, num=4*D*H).reshape(D, 4 * H)
Wh = np.linspace(-0.3, 0.6, num=4*H*H).reshape(H, 4 * H)
b = np.linspace(0.2, 0.7, num=4*H)

h, cache = lstm_forward(x, h0, Wx, Wh, b)

expected_h = np.asarray([
 [[ 0.01764008,  0.01823233,  0.01882671,  0.0194232 ],
  [ 0.11287491,  0.12146228,  0.13018446,  0.13902939],
  [ 0.31358768,  0.33338627,  0.35304453,  0.37250975]],
 [[ 0.45767879,  0.4761092,   0.4936887,   0.51041945],
  [ 0.6704845,   0.69350089,  0.71486014,  0.7346449 ],
  [ 0.81733511,  0.83677871,  0.85403753,  0.86935314]]])

print('h error: ', rel_error(expected_h, h))


from cs231n.rnn_layers import lstm_forward, lstm_backward
np.random.seed(231)

Пример #2

Показать файл

    def loss(self, features, captions):
        """
        Compute training-time loss for the RNN. We input image features and
        ground-truth captions for those images, and use an RNN (or LSTM) to compute
        loss and gradients on all parameters.

        Inputs:
        - features: Input image features, of shape (N, D)
        - captions: Ground-truth captions; an integer array of shape (N, T) where
          each element is in the range 0 <= y[i, t] < V

        Returns a tuple of:
        - loss: Scalar loss
        - grads: Dictionary of gradients parallel to self.params
        """
        # Cut captions into two pieces: captions_in has everything but the last word
        # and will be input to the RNN; captions_out has everything but the first
        # word and this is what we will expect the RNN to generate. These are offset
        # by one relative to each other because the RNN should produce word (t+1)
        # after receiving word t. The first element of captions_in will be the START
        # token, and the first element of captions_out will be the first word.
        captions_in = captions[:, :-1]
        captions_out = captions[:, 1:]

        # You'll need this
        mask = (captions_out != self._null)
        # Weight and bias for the affine transform from image features to initial
        # hidden state
        W_proj, b_proj = self.params['W_proj'], self.params['b_proj']

        # Word embedding matrix
        W_embed = self.params['W_embed']

        # Input-to-hidden, hidden-to-hidden, adn biases for the RNN
        Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']

        # Weight and bias for the hidden-to-vocab transformation
        W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the forward and backward passes for the CaptioningRNN.   #
        # In the forward pass you will need to do the following:                   #
        # (1) Use an affine transformation to compute the initial hidden state     #
        #     from the image features. This should produce an array of shape (N, H)#
        # (2) Use a word embedding layer to transform the words in captions_in     #
        #     from indices to vectors, giving an array of shape (N, T, W).         #
        # (3) Use either a vanilla RNN or LSTM (depending on self.cell_type) to    #
        #     process the sequence of input word vectors and produce hidden state  #
        #     vectors for all timesteps, producing an array of shape (N, T, H).    #
        # (4) Use a (temporal) affine transformation to compute scores over the    #
        #     vocabulary at every timestep using the hidden states, giving an      #
        #     array of shape (N, T, V).                                            #
        # (5) Use (temporal) softmax to compute loss using captions_out, ignoring  #
        #     the points where the output word is <NULL> using the mask above.     #
        #                                                                          #
        # In the backward pass you will need to compute the gradient of the loss   #
        # with respect to all model parameters. Use the loss and grads variables   #
        # defined above to store loss and gradients; grads[k] should give the      #
        # gradients for self.params[k].                                            #
        ############################################################################
        # pass
        N, _ = features.shape
        h0 = np.dot(features,W_proj) + b_proj # (1)
        word_vec, word_vec_cache = word_embedding_forward(captions_in, W_embed) # (2)
        if self.cell_type == 'rnn':
            h, h_cache = rnn_forward(x= word_vec, h0= h0, Wx= Wx, Wh= Wh, b= b)  # (3)
            out, out_cache = temporal_affine_forward(x= h, w= W_vocab, b= b_vocab) # (4)
            mask = (captions_out!=self._null)
            loss, dx = temporal_softmax_loss(x= out, y= captions_out, mask= mask, verbose=False)
            dh, grads['W_vocab'], grads['b_vocab'] = temporal_affine_backward(dx, out_cache)
            dcaption_in, dh0, grads['Wx'], grads['Wh'], grads['b'] = rnn_backward(dh, h_cache)
            grads['W_embed'] = word_embedding_backward(dcaption_in, word_vec_cache)
            grads['W_proj'] = np.dot(features.T, dh0)
            grads['b_proj'] = np.sum(dh0, axis=0)
        else:
            if self.cell_type == 'lstm':
                h, h_cache = lstm_forward(x= word_vec, h0= h0, Wx= Wx, Wh= Wh, b= b) # (2)
                out, out_cache = temporal_affine_forward(x= h, w= W_vocab, b= b_vocab)
                mask = (captions_out!=self._null)
                loss, dx = temporal_softmax_loss(x= out, y= captions_out, mask= mask, verbose= False)
                dh, grads['W_vocab'], grads['b_vocab'] = temporal_affine_backward(dx, out_cache)
                dcaption_in, dh0, grads['Wx'], grads['Wh'], grads['b'] = lstm_backward(dh, h_cache)
                grads['W_embed'] = word_embedding_backward(dcaption_in, word_vec_cache)
                grads['W_proj'] = np.dot(features.T, dh0)
                grads['b_proj'] = np.sum(dh0, axis=0)
            else:
                print('Unknow type')
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads