def forward_step(self, e_t, d_t, d_prev, d_score_prev, z_scores_prev, trans, batch): """ :param e_t: 1D: Batch, 2D: n_y :param d_t: 1D: Batch :param d_prev: 1D: Batch :param d_score_prev: 1D: Batch :param z_scores_prev: 1D: Batch, 2D: n_y :param trans: 1D: n_y, 2D, n_y """ d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch), d_t] # 1D: Batch z_sum = z_scores_prev.dimshuffle(0, "x", 1) + trans # 1D: Batch, 2D: n_y, 3D: n_y z_scores_t = logsumexp(z_sum, axis=2).reshape(e_t.shape) + e_t # 1D: Batch, 2D: n_y return d_t, d_score_t, z_scores_t
def forward_step(self, e_t, d_t, d_prev, d_score_prev, z_scores_prev, trans, batch): """ :param e_t: 1D: Batch, 2D: n_y :param d_t: 1D: Batch :param d_prev: 1D: Batch :param d_score_prev: 1D: Batch :param z_scores_prev: 1D: Batch, 2D: n_y :param trans: 1D: n_y, 2D, n_y """ d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch), d_t] # 1D: Batch z_sum = z_scores_prev.dimshuffle( 0, 'x', 1) + trans # 1D: Batch, 2D: n_y, 3D: n_y z_scores_t = logsumexp(z_sum, axis=2).reshape( e_t.shape) + e_t # 1D: Batch, 2D: n_y return d_t, d_score_t, z_scores_t
def y_prob(self, h, y, batch): """ :param h: 1D: n_words, 2D: Batch, 3D: n_y :param y: 1D: n_words, 2D: Batch :return: gradient of cross entropy: 1D: Batch """ y_score0 = self.BOS[y[0]] + h[0][T.arange(batch), y[0]] # 1D: Batch z_score0 = self.BOS + h[0] # 1D: Batch, 2D: n_y [_, y_scores, z_scores], _ = theano.scan(fn=self.forward_step, sequences=[h[1:], y[1:]], outputs_info=[y[0], y_score0, z_score0], non_sequences=[self.W_t, batch]) y_score = y_scores[-1] z_score = logsumexp(z_scores[-1], axis=1).flatten() return y_score - z_score
def y_prob(self, h, y, batch): """ :param h: 1D: n_words, 2D: Batch, 3D: n_y :param y: 1D: n_words, 2D: Batch :return: gradient of cross entropy: 1D: Batch """ y_score0 = self.BOS[y[0]] + h[0][T.arange(batch), y[0]] # 1D: Batch z_score0 = self.BOS + h[0] # 1D: Batch, 2D: n_y [_, y_scores, z_scores], _ = theano.scan( fn=self.forward_step, sequences=[h[1:], y[1:]], outputs_info=[y[0], y_score0, z_score0], non_sequences=[self.W_t, batch], ) y_score = y_scores[-1] z_score = logsumexp(z_scores[-1], axis=1).flatten() return y_score - z_score