def train_forward(self, x: Variable, h=None): """ :param x: shape: (batch_size, sequence_length, vocab_size) :param h: :return: """ if self.initialize: self.u = Variable(np.random.normal( 0, 1, (x.shape[1], self.hidden_units)), trainable=True) self.initialize = False if h is None: # In the first RNNcell, we don't have any hidden layers, so we initialize one h = Variable( np.random.normal(0, 1, (x.shape[0], self.hidden_units))) xu = x.dot(self.u) hw = h.dot(self.w) self.a = xu + hw + self.b self.h = self.activation(self.a) self.o = self.h.dot(self.v) + self.c return self.o, self.h
def forward(self, x: Variable): """ :param x: x [:::::: vocab_size] a one-hot value Thing is, we don't really care what's at the front. We only need to use the last dimension ( which must be a one-hot) to find its mapping. :return: """ self.vocab_size = x.shape[-1] if self.initialize: self.mapping = Variable(np.random.normal(0, 1, (self.vocab_size, self.embed_size)), trainable=True) self.initialize = False # First, find the corresponding word representation embedded_word = x.dot(self.mapping) # n x embed_size return embedded_word