def __init__(
            self,
            n_src_vocab, len_max_seq, d_word_vec,
            n_layers, n_head, d_k, d_v,
            d_model, d_inner, dropout=0.1, pretrained_embeddings=None):

        super().__init__()

        n_position = len_max_seq + 1

        if pretrained_embeddings is None:
            self.src_word_emb = nn.Embedding(
                n_src_vocab, d_word_vec, padding_idx=Constants.PAD)
        else:
            self.src_word_emb = nn.Embedding.from_pretrained(
                pretrained_embeddings, padding_idx=Constants.PAD, freeze=True)

        self.position_enc = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(n_position, d_word_vec, padding_idx=0),
            freeze=True)

        self.segment_enc = nn.Embedding(int(n_position/2), d_word_vec, padding_idx=0)

        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)])
示例#2
0
 def __init__(self, bert, hidden_size, num_hidden_layers, num_attention_heads, dropout):
     super().__init__()
     self.N = num_hidden_layers
     self.bert = bert
     self.pe = PositionalEncoder(hidden_size, dropout=dropout)
     self.layers = get_clones(EncoderLayer(hidden_size, num_attention_heads, dropout), num_hidden_layers)
     self.norm = Norm(hidden_size)
示例#3
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 input_vocab_size,
                 maximum_position_encoding,
                 rate=0.1):
        super(ImageCaptioningEncoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        # self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
        # self.pos_encoding = positional_encoding(maximum_position_encoding,
        #                                         self.d_model)

        self.affine = tf.keras.layers.Dense(d_model)

        self.enc_layers = [
            EncoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]

        self.dropout = tf.keras.layers.Dropout(rate)
示例#4
0
 def __init__(self, vocab_size, d_model, N, heads, dropout):
     super().__init__()
     self.N = N
     self.embed = Embedder(vocab_size, d_model)
     self.pe = PositionalEncoder(d_model, dropout=dropout)
     self.layers = get_clones(EncoderLayer(d_model, heads, dropout), N)
     self.norm = Norm(d_model)
    def __init__(self,
                 n_src_vocab,
                 len_max_seq,
                 d_word_vec,
                 n_layers,
                 n_heads,
                 d_k,
                 d_v,
                 d_model,
                 d_inner,
                 dropout=0.1):

        super().__init__()

        n_position = len_max_seq + 1

        self.src_word_emb = nn.Embedding(n_src_vocab,
                                         d_word_vec,
                                         padding_idx=config.pad_id)

        self.position_enc = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(n_position, d_word_vec, padding_idx=0),
            freeze=True)

        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model, d_inner, n_heads, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)
        ])
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "从超参数构造模型"
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # 从代码来看,使用 Glorot / fan_avg初始化参数很重要。
    # 对参数进行均匀分布初始化
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
示例#7
0
 def __init__(self, d_model, N_layers, heads, dropout):
     super().__init__()
     self.N_layers = N_layers
     # self.embed = Embedder(vocab_size, d_model)
     # self.pe = PositionalEncoder(d_model, dropout=dropout)
     # self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
     self.layers = get_clones(EncoderLayer(d_model, heads, dropout),
                              N_layers)
     self.norm = Norm(d_model)
示例#8
0
    def __init__(self, vocab_size, d_model, N, heads, dropout, device):
        super().__init__()
        self.N = N

        # We need to use the embedder
        # self.embed = Embedder(vocab_size, d_model)
        # self.embed = nn.Linear(vocab_size, d_model)

        self.pe = PositionalEncoder(d_model, dropout=dropout, device=device)
        self.layers = get_clones(EncoderLayer(d_model, heads, dropout), N)
        self.norm = Norm(d_model)
示例#9
0
 def __init__(self, vocab_size, d_model, N, heads, dropout, field, word_emb,
              opt):
     super().__init__()
     self.N = N
     self.word_emb = word_emb
     self.opt = opt  # unused, just for querying
     self.embed = Embedder(vocab_size, d_model, word_emb, field)
     self.pe = PositionalEncoder(d_model, dropout=dropout)
     self.layers = get_clones(EncoderLayer(d_model, heads, dropout),
                              N)  # attention
     self.norm = Norm(d_model)
示例#10
0
 def __init__(self, bpe_size, h, d_model, p, d_ff):
     super(Transformer, self).__init__()
     self.bpe_size = bpe_size
     self.word_emb = nn.Embedding(bpe_size, d_model, padding_idx=0)
     self.pos_emb = PositionalEncoding(d_model, p)
     self.encoder = nn.ModuleList([EncoderLayer(h, d_model, p, d_ff) for _ in range(6)]) 
     self.decoder = nn.ModuleList([DecoderLayer(h, d_model, p, d_ff) for _ in range(6)])
     self.generator = nn.Linear(d_model, bpe_size, bias=False)
     # tie weight between word embedding and generator 
     self.generator.weight = self.word_emb.weight
     self.logsoftmax = nn.LogSoftmax()
     # pre-save a mask to avoid future information in self-attentions in decoder
     # save as a buffer, otherwise will need to recreate it and move to GPU during every call
     mask = torch.ByteTensor(np.triu(np.ones((512,512)), k=1).astype('uint8'))
     self.register_buffer('mask', mask)
示例#11
0
    def __init__(self, embed_size, n_layers, n_head, d_k, d_v,
            d_model, d_inner,):

        """Load the pretrained ResNet-152 and replace top fc layer."""
        super(Encoder, self).__init__()
        resnet = models.resnet152(pretrained=True)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.resnet = nn.Sequential(*modules)
        self.linear = nn.Linear(resnet.fc.in_features, embed_size)
        self.bn = nn.BatchNorm1d(embed_size, momentum=0.01)

        self.dropout = nn.Dropout(p=dropout)
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)])
示例#12
0
    def __init__(self,
                 word_emb,
                 rela_emb,
                 max_len,
                 n_layers=6,
                 n_head=8,
                 d_k=64,
                 d_v=64,
                 d_word_vec=512,
                 d_model=512,
                 d_inner_hid=1024,
                 dropout=0.1):
        super(Encoder, self).__init__()

        n_position = max_len + 1
        self.max_len = max_len
        self.d_model = d_model

        self.position_enc = nn.Embedding(n_position,
                                         d_word_vec,
                                         padding_idx=Constants.PAD)
        self.position_enc.weight.data = position_encoding_init(
            n_position, d_word_vec)

        #Word Embedding layer
        self.word_embedding = nn.Embedding(word_emb.shape[0],
                                           word_emb.shape[1])
        self.eord_embedding_weightn = nn.Parameter(
            torch.from_numpy(word_emb).float())
        self.word_embedding.weight.requires_grad = False  # fix the embedding matrix

        #Rela Embedding Layer
        self.rela_embedding = nn.Embedding(rela_emb.shape[0],
                                           rela_emb.shape[1])
        self.rela_embedding.weight = nn.Parameter(
            torch.from_numpy(rela_emb).float())
        self.rela_embedding.weight.requires_grad = False  # fix the embedding matrix

        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model,
                         d_inner_hid,
                         n_head,
                         d_k,
                         d_v,
                         dropout=dropout) for _ in range(n_layers)
        ])
示例#13
0
    def __init__(
            self,
            len_seq, d_word_vec,
            n_layers, n_head, d_k, d_v,
            d_inner, dropout=0.1):

        super(Encoder, self).__init__()
                      
        n_position = len_seq #+ 1  #TODO Because of SOS. Not required for continuous inputs
        self.position_enc = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(n_position, d_k*n_head, padding_idx=0), #padding index is for SOS;;;; Also d_wrd_vec was changed to d_k (true #features) 
            
            freeze=True)  #Loading the table as a pretrained embedding. freeze=True makes sure it will not be updated and the same
            #across encoder and decoder

        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)])
示例#14
0
 def __init__(self,
              d_model=512,
              n_head=8,
              n_layers=6,
              d_inner=2048,
              n_positions=200,
              dropout=0.1):
     super().__init__()
     self.d_k = self.d_v = self.d_q = d_model // n_head
     self.position_enc = PositionalEncoding(d_model, n_position=n_positions)
     # self.dropout = nn.Dropout(p=dropout)
     self.layer_stack = nn.ModuleList([
         EncoderLayer(d_model,
                      d_inner,
                      n_head,
                      self.d_k,
                      self.d_v,
                      dropout=dropout) for _ in range(n_layers)
     ])
     self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
示例#15
0
    def __init__(self,
                 n_src_vocab,
                 n_max_seq,
                 n_layers=3,
                 n_head=3,
                 d_k=64,
                 d_v=64,
                 d_word_vec=128,
                 d_model=128,
                 d_inner_hid=128,
                 dropout=0.8):

        super(Encoder, self).__init__()

        n_position = n_max_seq + 1
        self.n_max_seq = n_max_seq
        self.d_model = d_model

        self.position_enc = nn.Embedding(n_position,
                                         d_word_vec,
                                         padding_idx=Constants_PAD)
        self.position_enc.weight.data = position_encoding_init(
            n_position, d_word_vec)

        self.src_word_emb = nn.Embedding(n_src_vocab,
                                         d_word_vec,
                                         padding_idx=Constants_PAD)

        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model,
                         d_inner_hid,
                         n_head,
                         d_k,
                         d_v,
                         dropout=dropout) for _ in range(n_layers)
        ])

        self.output = nn.Linear(800 * 128, 19)
示例#16
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=.1):
    """ construct model from hyper-parameters"""
    c = copy.deepcopy
    attn_rpr = MultiHeadedAttention_RPR(d_model, h, max_relative_position=5)
    attn = MultiHeadedAttention(d_model, h)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn_rpr), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn_rpr), c(attn), c(ff), dropout),
                N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
 def __init__(self, d_input, d_model, N, heads, dropout):
     super().__init__()
     self.N = N
     self.layers = get_clones(
         EncoderLayer(d_input, d_model, heads, dropout), N)
     self.norm = nn.LayerNorm(d_model)