def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "从超参数构造模型"
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # 从代码来看,使用 Glorot / fan_avg初始化参数很重要。
    # 对参数进行均匀分布初始化
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Exemplo n.º 2
0
 def __init__(self, vocab_size, d_model, N, heads, dropout):
     super().__init__()
     self.N = N
     self.embed = Embedder(vocab_size, d_model)
     self.pe = PositionalEncoder(d_model, dropout=dropout)
     self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N)
     self.norm = Norm(d_model)
Exemplo n.º 3
0
 def __init__(self, bert, hidden_size, num_hidden_layers, num_attention_heads, dropout):
     super().__init__()
     self.N = num_hidden_layers
     self.bert = bert
     self.pe = PositionalEncoder(hidden_size, dropout=dropout)
     self.layers = get_clones(DecoderLayer(hidden_size, num_attention_heads, dropout), num_hidden_layers)
     self.norm = Norm(hidden_size)
Exemplo n.º 4
0
def make_model_newast(src_vocab, tgt_vocab, ast_vocab, kg_embed, N=6, d_model=512, d_ff=2048, d_intermediate=512, h=8, dropout=0.1):
    "从超参数构造模型"
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    attn_ent = MultiHeadedAttention(h, d_model)
    attn_ast = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    with open(kg_embed, "r", encoding='utf-8') as f:
        lines = json.loads(f.read())
        vecs = list()
        # vecs.append([0] * 100)  # CLS
        for (i, line) in enumerate(lines):
            if line == "ent_embeddings":
                for vec in lines[line]:
                    vec = [float(x) for x in vec]
                    vecs.append(vec)
    embed = torch.FloatTensor(vecs)

    model = EncoderDecoder4newAST(
        Encoder4KG(EncoderLayer4KG(d_model, d_intermediate, c(attn), c(attn_ent), c(ff), dropout), N),
        Encoder4newAST(EncoderLayer4newAST(d_model, c(attn_ast), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        torch.nn.Embedding.from_pretrained(embed),
        nn.Sequential(Embeddings(d_model, 5000), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # 从代码来看,使用 Glorot / fan_avg初始化参数很重要。
    # 对参数进行均匀分布初始化
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Exemplo n.º 5
0
 def __init__(self, n_trg_vocab, d_word_vec, n_layers, n_head, d_k, d_v,
         d_model, d_inner, pad_idx, n_position=200, dropout=0.1):
     """Set the hyper-parameters and build the layers."""
     super(Decoder, self).__init__()
     self.trg_word_emb = nn.Embedding(n_trg_vocab, d_word_vec, padding_idx=pad_idx)
     self.position_enc = PositionalEncoding(d_word_vec, n_position=n_position)
     self.dropout = nn.Dropout(p=dropout)
     self.layer_stack = nn.ModuleList([
         DecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
         for _ in range(n_layers)])
     self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
Exemplo n.º 6
0
 def __init__(self, vocab_size, d_model, N, heads, dropout,
              decoder_extra_layers, field, word_emb, opt):
     super().__init__()
     self.N = N
     self.opt = opt
     self.word_emb = word_emb  # unused, just for querying
     self.embed = Embedder(vocab_size, d_model, word_emb, field)
     self.pe = PositionalEncoder(d_model, dropout=dropout)
     self.layers = get_clones(
         DecoderLayer(d_model, heads, decoder_extra_layers, dropout), N,
         decoder_extra_layers)
     self.norm = Norm(d_model)
Exemplo n.º 7
0
    def __init__(
            self,
            n_tgt_vocab, len_max_seq, d_word_vec,
            n_layers, n_head, d_k, d_v,
            d_model, d_inner, dropout=0.1, pretrained_embeddings=None):

        super().__init__()
        n_position = len_max_seq + 1

        self.layer_stack = nn.ModuleList([
            DecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)])
Exemplo n.º 8
0
 def __init__(self, bpe_size, h, d_model, p, d_ff):
     super(Transformer, self).__init__()
     self.bpe_size = bpe_size
     self.word_emb = nn.Embedding(bpe_size, d_model, padding_idx=0)
     self.pos_emb = PositionalEncoding(d_model, p)
     self.encoder = nn.ModuleList([EncoderLayer(h, d_model, p, d_ff) for _ in range(6)]) 
     self.decoder = nn.ModuleList([DecoderLayer(h, d_model, p, d_ff) for _ in range(6)])
     self.generator = nn.Linear(d_model, bpe_size, bias=False)
     # tie weight between word embedding and generator 
     self.generator.weight = self.word_emb.weight
     self.logsoftmax = nn.LogSoftmax()
     # pre-save a mask to avoid future information in self-attentions in decoder
     # save as a buffer, otherwise will need to recreate it and move to GPU during every call
     mask = torch.ByteTensor(np.triu(np.ones((512,512)), k=1).astype('uint8'))
     self.register_buffer('mask', mask)
Exemplo n.º 9
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 target_vocab_size,
                 maximum_position_encoding,
                 rate=0.1):
        super(ImageCaptioningDecoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        self.pos_encoding = positional_encoding(maximum_position_encoding,
                                                d_model)

        self.dec_layers = [
            DecoderLayer(d_model, num_heads, dff, rate)
            for _ in range(num_layers)
        ]
        self.dropout = tf.keras.layers.Dropout(rate)
Exemplo n.º 10
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=.1):
    """ construct model from hyper-parameters"""
    c = copy.deepcopy
    attn_rpr = MultiHeadedAttention_RPR(d_model, h, max_relative_position=5)
    attn = MultiHeadedAttention(d_model, h)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn_rpr), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn_rpr), c(attn), c(ff), dropout),
                N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Exemplo n.º 11
0
    def __init__(self,
                 n_tgt_vocab,
                 n_max_seq,
                 n_layers=6,
                 n_head=8,
                 d_k=64,
                 d_v=64,
                 d_word_vec=512,
                 d_model=512,
                 d_inner_hid=1024,
                 dropout=0.1):

        super(Decoder, self).__init__()
        n_position = n_max_seq + 1
        self.n_max_seq = n_max_seq
        self.d_model = d_model

        self.position_enc = nn.Embedding(n_position,
                                         d_word_vec,
                                         padding_idx=Constants.PAD)
        self.position_enc.weight.data = position_encoding_init(
            n_position, d_word_vec)

        self.tgt_word_emb = nn.Embedding(n_tgt_vocab,
                                         d_word_vec,
                                         padding_idx=Constants.PAD)
        self.dropout = nn.Dropout(dropout)

        self.layer_stack = nn.ModuleList([
            DecoderLayer(d_model,
                         d_inner_hid,
                         n_head,
                         d_k,
                         d_v,
                         dropout=dropout) for _ in range(n_layers)
        ])
Exemplo n.º 12
0
 def __init__(self, d_input, d_model, N, heads, dropout):
     super().__init__()
     self.N = N
     self.layers = get_clones(
         DecoderLayer(d_input, d_model, heads, dropout), N)
     self.norm = nn.LayerNorm(d_model)
Exemplo n.º 13
0
def make_model_ast(src_vocab, tgt_vocab, voc_size, device, kg_embed, ast_embed, N=6, d_model=512, d_ff=2048,
                   d_intermediate=512, h=8, dropout=0.1, embedding_dim=512, hidden_size=512):
    # "????????
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    attn_ent = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    with open(kg_embed, "r", encoding='utf-8') as f:
        lines = json.loads(f.read())
        vecs = list()
        # vecs.append([0] * 100)  # CLS
        for (i, line) in enumerate(lines):
            if line == "ent_embeddings":
                for vec in lines[line]:
                    vec = [float(x) for x in vec]
                    vecs.append(vec)
    embed = torch.FloatTensor(vecs)

    ast_size = voc_size
    path_listtensor = list()
    lstm = LSTM(voc_size, embedding_dim, hidden_size)
    # bilstm = BiLSTM(voc_size, embedding_dim, hidden_size, device)

    # for k in ast_embed.keys():
    #     pathlist = ast_embed[k]
    #     path_tensor = torch.zeros(voc_size).long()
    #     path_tensor = Variable(path_tensor).to(device)
    #     for p in pathlist:
    #         outpath = torch.zeros(voc_size).long()
    #         outpath = Variable(outpath).to(device)
    #         p.unsqueeze(0)
    #         hidden = lstm.init_hidden().to(device)
    #         cell_state = lstm.init_cell_state().to(device)
    #         #cell_state_b = lstm.init_cell_state().to(device)
    #         i = 0
    #         while i < (list(p.size())[0]):
    #             # output, hiddenout, cell_state, cell_state_b = bilstm.forward(p[i], p[list(p.size())[0] - 1 - i], hidden, cell_state, cell_state_b)
    #             output, hidden, cell_state = lstm(p[i], hidden, cell_state)
    #             i += 1
    #             outpath += output
    #         path_tensor += outpath
    #
    #     path_float = list()
    #     for x in path_tensor:
    #         path_float.append(float(x))
    #     path_listtensor.append(path_float)
    # ast_embed = torch.FloatTensor(path_listtensor)

    model = EncoderDecoder4AST(
        Encoder4AST(EncoderLayer4AST(d_model, d_intermediate, ast_size, c(attn), c(attn_ent), c(ff), dropout, voc_size,
                                     embedding_dim, hidden_size, device), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        torch.nn.Embedding.from_pretrained(embed),
        #nn.Sequential(Embeddings(d_model, voc_size)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # ?????,?? Glorot / fan_avg?????????
    # ????????????
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model