def score(self, ys, state, x):
     ys_mask = subsequent_mask(len(ys), device=x.device).unsqueeze(0)
     logp, state = self.forward_one_step(ys.unsqueeze(0),
                                         ys_mask,
                                         x.unsqueeze(0),
                                         cache=state)
     return logp.squeeze(0), state
Пример #2
0
 def score(self, ys, state, x):
     """Score."""
     ys_mask = subsequent_mask(len(ys), device=x.device).unsqueeze(0)
     if self.selfattention_layer_type != "selfattn":
         # TODO(karita): implement cache
         logging.warning(
             f"{self.selfattention_layer_type} does not support cached decoding."
         )
         state = None
     logp, state = self.forward_one_step(
         ys.unsqueeze(0), ys_mask, x.unsqueeze(0), cache=state
     )
     return logp.squeeze(0), state
 def forward(self, text, speaker_embedding=None):
     self.eval()
     x = text
     xs = x.unsqueeze(0)
     hs, _ = self.encoder(xs, None)
     if self.spk_embed_dim is not None:
         speaker_embeddings = speaker_embedding.unsqueeze(0)
         hs = self._integrate_with_spk_embed(hs, speaker_embeddings)
     maxlen = int(hs.size(1) * 10.0 / self.reduction_factor)
     minlen = int(hs.size(1) * 0.0 / self.reduction_factor)
     idx = 0
     ys = hs.new_zeros(1, 1, self.odim)
     outs, probs = [], []
     z_cache = self.decoder.init_state(x)
     while True:
         idx += 1
         y_masks = subsequent_mask(idx).unsqueeze(0).to(x.device)
         z, z_cache = self.decoder.forward_one_step(ys,
                                                    y_masks,
                                                    hs,
                                                    cache=z_cache)
         outs += [self.feat_out(z).view(self.reduction_factor, self.odim)]
         probs += [torch.sigmoid(self.prob_out(z))[0]]
         ys = torch.cat((ys, outs[-1][-1].view(1, 1, self.odim)), dim=1)
         att_ws_ = []
         for name, m in self.named_modules():
             if isinstance(m, MultiHeadedAttention) and "src" in name:
                 att_ws_ += [m.attn[0, :, -1].unsqueeze(1)]
         if idx == 1:
             att_ws = att_ws_
         else:
             att_ws = [
                 torch.cat([att_w, att_w_], dim=1)
                 for att_w, att_w_ in zip(att_ws, att_ws_)
             ]
         if int(sum(probs[-1] >= 0.5)) > 0 or idx >= maxlen:
             if idx < minlen:
                 continue
             outs = (torch.cat(outs, dim=0).unsqueeze(0).transpose(1, 2))
             if self.postnet is not None:
                 outs = outs + self.postnet(outs)
             outs = outs.transpose(2, 1).squeeze(0)
             break
     return outs
    def batch_score(self, ys, states, xs):
        """
        Score new token batch (required).

        Args:
            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
            states (List[Any]): Scorer states for prefix tokens.
            xs (torch.Tensor):
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[torch.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        # merge states
        n_batch = len(ys)
        n_layers = len(self.decoders)
        if states[0] is None:
            batch_state = None
        else:
            # transpose state of [batch, layer] into [layer, batch]
            batch_state = [
                torch.stack([states[b][i] for b in range(n_batch)])
                for i in range(n_layers)
            ]

        # batch decoding
        ys_mask = subsequent_mask(ys.size(-1), device=xs.device).unsqueeze(0)
        logp, states = self.forward_one_step(ys,
                                             ys_mask,
                                             xs,
                                             cache=batch_state)

        # transpose state of [layer, batch] into [batch, layer]
        state_list = [[states[i][b] for i in range(n_layers)]
                      for b in range(n_batch)]
        return logp, state_list
 def _target_mask(self, olens):
     y_masks = make_non_pad_mask(olens).to(olens.device)
     s_masks = subsequent_mask(y_masks.size(-1),
                               device=y_masks.device).unsqueeze(0)
     return y_masks.unsqueeze(-2) & s_masks