Пример #1
0
 def __init__(self, d_k, d_v, d_model, d_ff, n_heads, dropout=0.1):
     super(DecoderLayer, self).__init__()
     self.dec_self_attn = MultiHeadAttention(d_k, d_v, d_model, n_heads,
                                             dropout)
     self.dec_enc_attn = MultiHeadAttention(d_k, d_v, d_model, n_heads,
                                            dropout)
     self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff, dropout)
Пример #2
0
    def __init__(self,
                 n_layers,
                 d_k,
                 d_v,
                 d_model,
                 d_ff,
                 n_heads,
                 max_seq_len,
                 tgt_vocab_size,
                 dropout=0.1,
                 weighted=False):
        super(tree_encoder, self).__init__()
        self.d_model = d_model
        self.n_layers = 1
        #        self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model, padding_idx=data_utils.PAD,)
        #        self.pos_emb = PosEncoding(25, 300) # TODO: *10 fix

        #        self.dropout_emb = nn.Dropout(dropout)
        #        self.layer_type = DecoderLayer if not weighted else WeightedDecoderLayer
        #        self.layers = nn.ModuleList(
        #            [self.layer_type(d_k, d_v, d_model, d_ff, n_heads, dropout) for _ in range(n_layers)])
        #
        #        self.V = nn.ParameterList([nn.Parameter((-.5 - .5) * torch.rand(300, 300) + .5, requires_grad = True) for _ in range(10)]) # 60x60 type er 30 ta
        self.Wm = nn.Linear(300, 300)
        self.Um = nn.Linear(300, 300)
        #        self.w = nn.Parameter((-.5 - .5) * torch.rand(1, 300) + .5, requires_grad = True)
        self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff, dropout)
        self.demo = nn.Linear(300, 300)
        #        self.head_attn = MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout)
        #self.layers1 = nn.ModuleList(
        #[ScaledDotProductAttention(300, dropout) for _ in range(self.n_layers)])
        #self.layers = nn.ModuleList(
        #[MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout) for _ in range(self.n_layers)])
        self.layers = nn.ModuleList([
            MultiBranchAttention(d_k, d_v, d_model, d_ff, n_heads, dropout)
            for _ in range(self.n_layers)
        ])
        #self.layers1 = nn.ModuleList(
        #[MultiBranchAttention(d_k, d_v, d_model, d_ff, n_heads, dropout) for _ in range(self.n_layers)])
        self.attention = ScaledDotProductAttention(300, dropout)
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)
        self.new_objective = False
        self.proj = nn.Linear(900, 300)