def output_hidden_state(C, o): ''' Output the hidden state. Input: C: the current cell state of the LSTM, a float torch Tensor of shape n by h. Here h is the number of cell units. o: the output gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. Output: H: the output hidden state values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. ''' ######################################### ## INSERT YOUR CODE HERE H = o * tanh(C) ######################################### return H
def output_hidden_state(C, o): ''' Outputing the hidden state. Input: C: the current cell state of the LSTM, a float torch Tensor of shape n by h. Here h is the number of cell units. o: the output gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. Output: H: the output hidden state values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. ''' c1 = tanh(C) H = th.mul(c1,o) return H
def gates(self, x, H): ''' Given a batch of training instances (with one time step), computing the gating functions: forget gate f, input gate i, output gate o; and candidate cells state. Note: W_f is matrix consisting weights for both inputs and hidden states. Here we assume the first p rows of W_f coresponds to input weights, last h rows of W_f coresponds to the weights of hidden states. Input: x: a batch of training instance, a float torch Tensor of shape n by p. Here n is the batch size. p is the number of features. H: the hidden state of the LSTM, a float torch Tensor of shape n by h. Here h is the number of hidden units. Output: f: the forget gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. i: the input gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. o: the output gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. C_c: the candidate cell state values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. Hint: you could solve this problem using 4-5 lines of code. ''' f = th.sigmoid(th.mm((th.cat([x,H],1)),self.W_f)+self.b_f) i = th.sigmoid(th.mm(th.cat([x,H],1),self.W_i)+self.b_i) o = th.sigmoid(th.mm(th.cat([x,H],1),self.W_o)+self.b_o) C_c = tanh(th.mm(th.cat([x,H],1),self.W_c)+self.b_c) return f, i, o, C_c
def gates(self, x, H): ''' Given a batch of training instances (with one time step), compute the gating functions: forget gate f, input gate i, output gate o; and candidate cells state. Note: W_f is matrix consisting weights for both inputs and hidden states. Here we assume the first p rows of W_f coresponds to input weights, last h rows of W_f coresponds to the weights of hidden states. Input: x: a batch of training instance, a float torch Tensor of shape n by p. Here n is the batch size. p is the number of features. H: the hidden state of the LSTM, a float torch Tensor of shape n by h. Here h is the number of hidden units. Output: f: the forget gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. i: the input gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. o: the output gate values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. C_c: the candidate cell state values of the batch of training instances, a float matrix of shape n by h. Here h is the number of hidden units. Hint: you could solve this problem using 4-5 lines of code. ''' ######################################### ## INSERT YOUR CODE HERE n, p = x.size() f = th.cat((x, H), dim=1) f = th.mm(f, self.W_f) f = f + self.b_f.expand(n, self.h) f = th.sigmoid(f) i = th.cat((x, H), dim=1) i = th.mm(i, self.W_i) i = i + self.b_i.expand(n, self.h) i = th.sigmoid(i) o = th.cat((x, H), dim=1) o = th.mm(o, self.W_o) o = o + self.b_o.expand(n, self.h) o = th.sigmoid(o) C_c = th.cat((x, H), dim=1) C_c = th.mm(C_c, self.W_c) C_c = C_c + self.b_c.expand(n, self.h) C_c = tanh(C_c) ######################################### return f, i, o, C_c