Exemplo n.º 1
0
 def __init__(self, hidden_size=512, num_heads=8, dropout=.1):
     super(Sublayer3, self).__init__()
     self.lnorm = LayerNorm(hidden_size)
     self.sublayer = MultiheadAttention(
         attention_size=hidden_size,
         num_heads=num_heads,
         dropout=dropout)
Exemplo n.º 2
0
    def __init__(self,
                 hidden_size=512,
                 num_heads=8,
                 dropout=0.1,
                 prenorm=True,
                 scalenorm=True):
        super(Sublayer3, self).__init__()
        self.sublayer = MultiheadAttention(
            attention_size=hidden_size,
            num_heads=num_heads,
            dropout=dropout,
            nystrom=False,  # Nystrom used only for self-attention
            kernel_size=
            None,  # convolutional residual not used when subsequent mask
        )
        self.prenorm = prenorm
        self.lnorm = LayerNorm(hidden_size) if not scalenorm else ScaleNorm(
            hidden_size)

        if self.prenorm:
            self.lnormy = (LayerNorm(hidden_size)
                           if not scalenorm else ScaleNorm(hidden_size))
Exemplo n.º 3
0
 def __init__(
     self,
     hidden_size=512,
     inner_size=2048,
     dropout=0.1,
     prenorm=True,
     scalenorm=True,
 ):
     super(Sublayer2, self).__init__()
     self.sublayer = PositionwiseFF(hidden_size,
                                    inner_size,
                                    dropout=dropout)
     self.prenorm = prenorm
     self.lnorm = LayerNorm(hidden_size) if not scalenorm else ScaleNorm(
         hidden_size)
Exemplo n.º 4
0
 def __init__(self,
              n_in,
              n_out,
              activation='relu',
              layer_norm=True,
              bias=True,
              dropout=.1):
     super(FF, self).__init__()
     self.fc = nn.Linear(n_in, n_out, bias=bias)
     self.activation = NON_LINEARITIES.get(activation, nn.ReLU)
     if self.activation is not None:
         self.activation = self.activation()
     self.layer_norm = None
     if layer_norm:
         self.layer_norm = LayerNorm(n_out)
     self.drop = nn.Dropout(dropout)
Exemplo n.º 5
0
 def __init__(
     self,
     hidden_size=512,
     num_heads=8,
     dropout=0.1,
     nystrom=False,
     num_landmarks=32,
     kernel_size=None,
     prenorm=True,
     scalenorm=True,
 ):
     super(Sublayer1, self).__init__()
     self.sublayer = MultiheadAttention(
         attention_size=hidden_size,
         num_heads=num_heads,
         dropout=dropout,
         nystrom=nystrom,
         kernel_size=kernel_size,
         num_landmarks=num_landmarks,
     )
     self.prenorm = prenorm
     self.lnorm = LayerNorm(hidden_size) if not scalenorm else ScaleNorm(
         hidden_size)
Exemplo n.º 6
0
 def __init__(self, hidden_size=512, inner_size=2048, dropout=.1):
     super(Sublayer2, self).__init__()
     self.lnorm = LayerNorm(hidden_size)
     self.sublayer = PositionwiseFF(hidden_size,
                                    inner_size,
                                    dropout=dropout)