def __init__(self, d_model, heads, dropout=0.1): super(DecoderLayer, self).__init__() self.norm_1 = Norm(d_model) self.norm_2 = Norm(d_model) self.norm_3 = Norm(d_model) self.attn_1 = MultiHeadAttention(heads, d_model) self.attn_2 = MultiHeadAttention(heads, d_model) self.ff = FeedForward(d_model).cuda() self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout) self.dropout_3 = nn.Dropout(dropout)
def __init__(self, d_model, N, heads, max_seq_len): super(TransformerEncoder, self).__init__() self.N = N self.position = PositionalEncoder(d_model, max_seq_len) # Generate N Encoder layers self.layers = get_clones(EncoderLayer(d_model, heads), N) self.norm = Norm(d_model)
def __init__(self, d_model, heads, dropout=0.1): """ :param d_model: :param heads: :param dropout: """ super(EncoderLayer, self).__init__() self.norm_1 = Norm(d_model) self.norm_2 = Norm(d_model) # Two main sub-layers self.attn = MultiHeadAttention(heads, d_model) self.ff = FeedForward(d_model) # Residual dropout self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout)
def __init__(self, vocab_size, d_model, N, heads, max_seq_len): super(TransformerDecoder, self).__init__() self.N = N self.embed = Embedder(vocab_size, d_model) self.position = PositionalEncoder(d_model, max_seq_len) # Generate N Decoder layers self.layers = get_clones(DecoderLayer(d_model, heads), N) self.norm = Norm(d_model)
def __init__(self, d_model, heads, dropout=0.1): """ Input: d_model is length of vector of embeddings heads is number of heads for MultiHeadAttetion """ super(EncoderLayer, self).__init__() self.norm_1 = Norm(d_model) self.norm_2 = Norm(d_model) # Two main sub-layers self.attn = MultiHeadAttention(heads, d_model) self.ff = FeedForward(d_model) # Residual dropout self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout)
def __init__(self, vocab_size, input_dim, hidden_dim, num_layer, bidirectional, dropout=0.2): super(LSTMDecoder, self).__init__() self.vocab_size = vocab_size self.hidden_dim = hidden_dim self.embed = Embedder(vocab_size, input_dim) self.norm = Norm(input_dim) # Define LSTM cell self.lstm_cell = nn.LSTMCell(input_dim, hidden_dim) self.linear_out = nn.Linear(hidden_dim, vocab_size) self.attn = GlobalAttentionCell(hidden_dim)