def __init__(self,
              config,
              output_attentions=False,
              keep_multihead_output=False):
     super(TransformerEncoder, self).__init__()
     self.output_attentions = output_attentions
     self.pre_layer_norm = config.pre_layer_norm
     layer = TransformerLayer(config,
                              output_attentions=output_attentions,
                              keep_multihead_output=keep_multihead_output)
     if config.share_layer:
         self.layer = nn.ModuleList(
             [layer for _ in range(config.num_hidden_layers)])
     else:
         self.layer = nn.ModuleList([
             copy.deepcopy(layer) for _ in range(config.num_hidden_layers)
         ])
     if self.pre_layer_norm:
         # If pre-LN Transformer, a final layer_norm would be placed after the last layer,
         # and intermediate layer_norms for all layer embedding outputs
         LayerNorm = TransformerLayerNorm(config.hidden_size,
                                          eps=config.layer_norm_eps)
         self.LayerNorm = nn.ModuleList([
             copy.deepcopy(LayerNorm)
             for _ in range(config.num_hidden_layers)
         ])
 def __init__(self, config):
     super(TransformerSelfOutput, self).__init__()
     self.pre_layer_norm = config.pre_layer_norm
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.LayerNorm = TransformerLayerNorm(config.hidden_size,
                                           eps=config.layer_norm_eps)
Exemplo n.º 3
0
    def __init__(self, config, input_dim):
        super(TransformerInputRepresentations, self).__init__()
        self.hidden_size = config.hidden_size
        self.spec_transform = nn.Linear(input_dim * config.downsample_rate, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = TransformerLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
 def __init__(self, config, output_dim):
     super(TransformerSpecPredictionHead, self).__init__()
     self.output_dim = output_dim
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)):
         self.transform_act_fn = ACT2FN[config.hidden_act]
     else:
         self.transform_act_fn = config.hidden_act
     self.LayerNorm = TransformerLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
     self.output = nn.Linear(config.hidden_size, self.output_dim * config.downsample_rate)