示例#1
0
 def __init__(self,
              n_heads,
              d_model,
              d_head=None,
              p=0.,
              bias=True,
              scale=True):
     super().__init__()
     d_head = ifnone(d_head, d_model // n_heads)
     self.n_heads, self.d_head, self.scale = n_heads, d_head, scale
     self.q_wgt, self.k_wgt, self.v_wgt = [
         nn.Linear(d_model, n_heads * d_head, bias=bias) for o in range(3)
     ]
     self.out = nn.Linear(n_heads * d_head, d_model, bias=bias)
     self.drop_att, self.drop_res = nn.Dropout(p), nn.Dropout(p)
     self.ln = nn.LayerNorm(d_model)
示例#2
0
    def __init__(self,
                 emb_enc,
                 emb_dec,
                 nh,
                 out_sl,
                 nl=2,
                 bos_idx=0,
                 pad_idx=1):
        super().__init__()
        self.nl, self.nh, self.out_sl, self.pr_force = nl, nh, out_sl, 1
        self.bos_idx, self.pad_idx = bos_idx, pad_idx
        self.emb_enc, self.emb_dec = emb_enc, emb_dec
        self.emb_sz_enc, self.emb_sz_dec = emb_enc.embedding_dim, emb_dec.embedding_dim
        self.voc_sz_dec = emb_dec.num_embeddings

        self.emb_enc_drop = nn.Dropout(0.15)
        self.gru_enc = nn.GRU(self.emb_sz_enc,
                              nh,
                              num_layers=nl,
                              dropout=0.25,
                              batch_first=True,
                              bidirectional=True)
        self.out_enc = nn.Linear(2 * nh, self.emb_sz_dec, bias=False)

        self.gru_dec = nn.GRU(self.emb_sz_dec + 2 * nh,
                              self.emb_sz_dec,
                              num_layers=nl,
                              dropout=0.1,
                              batch_first=True)
        self.out_drop = nn.Dropout(0.35)
        self.out = nn.Linear(self.emb_sz_dec, self.voc_sz_dec)
        self.out.weight.data = self.emb_dec.weight.data

        self.enc_att = nn.Linear(2 * nh, self.emb_sz_dec, bias=False)
        self.hid_att = nn.Linear(self.emb_sz_dec, self.emb_sz_dec)
        self.V = self.init_param(self.emb_sz_dec)
示例#3
0
    def __init__(self,
                 emb_enc,
                 emb_dec,
                 hidden_layer_size,
                 max_output_length,
                 nl=1,
                 bos_idx=0,
                 pad_idx=1):
        super().__init__()
        self.nl, self.hidden_layer_size, self.max_output_length = nl, hidden_layer_size, max_output_length
        self.bos_idx, self.pad_idx = bos_idx, pad_idx
        self.em_sz_enc = emb_enc.embedding_dim
        self.em_sz_dec = emb_dec.embedding_dim
        self.voc_sz_dec = emb_dec.num_embeddings

        self.emb_enc = emb_enc
        self.emb_enc_drop = nn.Dropout(0.15)
        # self.gru_enc = nn.GRU(self.em_sz_enc, hidden_layer_size, num_layers=nl,
        #                      dropout=0.25, batch_first=True)
        self.gru_enc = nn.GRU(self.em_sz_enc,
                              hidden_layer_size,
                              num_layers=nl,
                              batch_first=True)
        self.out_enc = nn.Linear(hidden_layer_size, self.em_sz_dec, bias=False)

        self.emb_dec = emb_dec
        # self.gru_dec = nn.GRU(self.em_sz_dec, self.em_sz_dec, num_layers=nl,
        #                      dropout=0.1, batch_first=True)
        self.gru_dec = nn.GRU(self.em_sz_dec,
                              self.em_sz_dec,
                              num_layers=nl,
                              batch_first=True)
        self.out_drop = nn.Dropout(0.35)
        self.out = nn.Linear(self.em_sz_dec, self.voc_sz_dec)
        self.out.weight.data = self.emb_dec.weight.data
        self.pr_force = 0.
示例#4
0
 def __init__(self, emb, inp_p=0.):
     super().__init__()
     self.emb_sz = 300
     self.embed = emb
     self.pos_enc = PositionalEncoding(300)
     self.drop = nn.Dropout(inp_p)
示例#5
0
 def feed_forward(d_model, d_ff, ff_p=0., double_drop=True):
     layers = [nn.Linear(d_model, d_ff), nn.ReLU()]
     if double_drop: layers.append(nn.Dropout(ff_p))
     return SequentialEx(*layers, nn.Linear(d_ff, d_model),
                         nn.Dropout(ff_p), MergeLayer(),
                         nn.LayerNorm(d_model))