def initialize_blocks(self): r""" Helper function to initialize blocks. """ for i in range(self._hparams.num_blocks): mh_attn = MultiheadRPRAttention( self._input_size, self._hparams.multihead_attention, stores_relative_position=bool(i == 0) ) self.self_attns.append(mh_attn) self.self_attn_layer_norm.append( T5LayerNorm(self._input_size, eps=self._hparams.eps)) if self._hparams.dim != mh_attn.hparams.output_dim: raise ValueError( 'The "dim" in the hparams of ' '"multihead_attention" should be equal to the ' '"dim" of T5Encoder') pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['out_features'] if self._hparams.dim != final_dim: raise ValueError( 'The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of T5Encoder.') self.poswise_networks.append(pw_net) self.poswise_layer_norm.append( T5LayerNorm(self._input_size, eps=self._hparams.eps))
def initialize_blocks(self): r"""Helper function to initialize blocks. """ for i in range(self._hparams.num_blocks): attn_module = MultiheadRPRAttention( self._input_size, self._hparams.multihead_attention, stores_relative_position=bool(i == 0)) if self._hparams.dim != attn_module.output_size: raise ValueError("The output dimension of " "MultiheadRPRAttention should be equal " "to the dim of T5Decoder") self.self_attns.append(attn_module) self.self_attn_layer_norm.append( T5LayerNorm(self._input_size, eps=self._hparams.eps)) attn_module = MultiheadRPRAttention( self._input_size, self._hparams.multihead_attention, stores_relative_position=bool(i == 0)) if self._hparams.dim != attn_module.output_size: raise ValueError("The output dimension of " "MultiheadRPRAttention should be equal " "to the dim of T5Decoder") self.enc_dec_attns.append(attn_module) self.end_dec_attn_layer_norm.append( T5LayerNorm(self._input_size, eps=self._hparams.eps)) poswise_network = FeedForwardNetwork( hparams=self._hparams.poswise_feedforward) if (poswise_network.hparams.layers[-1]['kwargs']['out_features'] != self._hparams.dim): raise ValueError("The output dimension of " "FeedForwardNetwork should be equal " "to the dim of T5Decoder") self.poswise_networks.append(poswise_network) self.poswise_layer_norm.append( T5LayerNorm(self._input_size, eps=self._hparams.eps))
def __init__(self, token_embedder: Optional[TokenEmbedder] = None, token_pos_embedder: Optional[TokenPosEmbedder] = None, vocab_size: Optional[int] = None, output_layer: Optional[Union[nn.Module, torch.Tensor]] = None, hparams=None): super().__init__(token_embedder, token_pos_embedder, vocab_size=vocab_size, output_layer=output_layer, hparams=hparams) self.final_layer_norm = T5LayerNorm( self._input_size, # type: ignore eps=self._hparams.eps)
def __init__(self, hparams=None): super().__init__(hparams=hparams) self.final_layer_norm = T5LayerNorm(self._input_size, eps=self._hparams.eps)