def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, headNum, vectorDim, entityDim, hiddenDim, dropout=0.1, useAtt=False, use_cuda=True): super(TransformerLayer, self).__init__() self.useAtt = useAtt self.selfAttention = MultiHeadAttention(headNum, vectorDim, entityDim, hiddenDim, dropout=dropout, useAtt=useAtt, use_cuda=use_cuda) self.posFeedForward = PositionwiseFeedForward(hiddenDim, dropout=dropout, use_cuda=use_cuda)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, rel_pos=False, rel_pos_clip=None, ex_mask=None): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout, rel_pos=rel_pos, rel_pos_clip=rel_pos_clip, ex_mask=ex_mask) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)