Пример #1
0
 def forward_step(self, e_t, d_t, d_prev, d_score_prev, z_scores_prev, trans, batch):
     """
     :param e_t: 1D: Batch, 2D: n_y
     :param d_t: 1D: Batch
     :param d_prev: 1D: Batch
     :param d_score_prev: 1D: Batch
     :param z_scores_prev: 1D: Batch, 2D: n_y
     :param trans: 1D: n_y, 2D, n_y
     """
     d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch), d_t]  # 1D: Batch
     z_sum = z_scores_prev.dimshuffle(0, "x", 1) + trans  # 1D: Batch, 2D: n_y, 3D: n_y
     z_scores_t = logsumexp(z_sum, axis=2).reshape(e_t.shape) + e_t  # 1D: Batch, 2D: n_y
     return d_t, d_score_t, z_scores_t
 def forward_step(self, e_t, d_t, d_prev, d_score_prev, z_scores_prev,
                  trans, batch):
     """
     :param e_t: 1D: Batch, 2D: n_y
     :param d_t: 1D: Batch
     :param d_prev: 1D: Batch
     :param d_score_prev: 1D: Batch
     :param z_scores_prev: 1D: Batch, 2D: n_y
     :param trans: 1D: n_y, 2D, n_y
     """
     d_score_t = d_score_prev + trans[d_t, d_prev] + e_t[T.arange(batch),
                                                         d_t]  # 1D: Batch
     z_sum = z_scores_prev.dimshuffle(
         0, 'x', 1) + trans  # 1D: Batch, 2D: n_y, 3D: n_y
     z_scores_t = logsumexp(z_sum, axis=2).reshape(
         e_t.shape) + e_t  # 1D: Batch, 2D: n_y
     return d_t, d_score_t, z_scores_t
    def y_prob(self, h, y, batch):
        """
        :param h: 1D: n_words, 2D: Batch, 3D: n_y
        :param y: 1D: n_words, 2D: Batch
        :return: gradient of cross entropy: 1D: Batch
        """
        y_score0 = self.BOS[y[0]] + h[0][T.arange(batch), y[0]]  # 1D: Batch
        z_score0 = self.BOS + h[0]  # 1D: Batch, 2D: n_y

        [_, y_scores,
         z_scores], _ = theano.scan(fn=self.forward_step,
                                    sequences=[h[1:], y[1:]],
                                    outputs_info=[y[0], y_score0, z_score0],
                                    non_sequences=[self.W_t, batch])

        y_score = y_scores[-1]
        z_score = logsumexp(z_scores[-1], axis=1).flatten()

        return y_score - z_score
Пример #4
0
    def y_prob(self, h, y, batch):
        """
        :param h: 1D: n_words, 2D: Batch, 3D: n_y
        :param y: 1D: n_words, 2D: Batch
        :return: gradient of cross entropy: 1D: Batch
        """
        y_score0 = self.BOS[y[0]] + h[0][T.arange(batch), y[0]]  # 1D: Batch
        z_score0 = self.BOS + h[0]  # 1D: Batch, 2D: n_y

        [_, y_scores, z_scores], _ = theano.scan(
            fn=self.forward_step,
            sequences=[h[1:], y[1:]],
            outputs_info=[y[0], y_score0, z_score0],
            non_sequences=[self.W_t, batch],
        )

        y_score = y_scores[-1]
        z_score = logsumexp(z_scores[-1], axis=1).flatten()

        return y_score - z_score