Пример #1
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None
        # For sanity check only, not relevant to implementation
        self.gen_sanity_check = False
        self.counter = 0

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size)
        self.h_projection = nn.Linear(2 * hidden_size, hidden_size, False)
        self.c_projection = nn.Linear(2 * hidden_size, hidden_size, False)
        self.att_projection = nn.Linear(2 * hidden_size, hidden_size, False)
        self.combined_output_projection = nn.Linear(3 * hidden_size,
                                                    hidden_size, False)
        self.target_vocab_projection = nn.Linear(hidden_size,
                                                 len(self.vocab.tgt), False)
        self.dropout = nn.Dropout(p=self.dropout_rate)
    def __init__(self, vocab, embed_size, hidden_size, output_size, batch_size, dropout_rate=0.2):
        super(ConditionalLSTM, self).__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size

        self.embedding = ModelEmbeddings(vocab, embed_size)
        self.context = nn.LSTM(embed_size, hidden_size, bidirectional=False)
        self.response = nn.LSTM(embed_size, hidden_size, bidirectional=False)
        self.proj = nn.Linear(hidden_size, output_size, bias=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.softmax = nn.LogSoftmax(dim=1)
        self.hidden = self.init_hidden()
Пример #3
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 use_attention=True):
        """ Init NMT Model.

        @param embed_size (tuple): Embedding size (src, tgt)
        @param hidden_size (tuple): Hidden Size (src, tgt)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        self.use_attention = use_attention

        if self.use_attention:
            print('Use attention', file=sys.stderr)
        else:
            print('Attention not used', file=sys.stderr)

        self.encoder = nn.LSTM(input_size=self.model_embeddings.embed_size[0],
                               hidden_size=self.hidden_size[0],
                               bidirectional=True)
        self.decoder = Att2inCore()
        self.att_projection = nn.Linear(in_features=self.hidden_size[0] * 2,
                                        out_features=self.hidden_size[1],
                                        bias=False)
        self.h_projection = nn.Linear(in_features=self.hidden_size[0] * 2,
                                      out_features=self.hidden_size[1],
                                      bias=False)
        self.c_projection = nn.Linear(in_features=self.hidden_size[0] * 2,
                                      out_features=self.hidden_size[1],
                                      bias=False)
        self.combined_output_projection = nn.Linear(
            in_features=self.hidden_size[0] * 2 + self.hidden_size[1],
            out_features=self.hidden_size[1],
            bias=False)
        self.target_vocab_projection = nn.Linear(
            in_features=self.hidden_size[1],
            out_features=len(self.vocab.tgt) + 1,
            bias=False)
        self.dropout = nn.Dropout(p=self.dropout_rate)

        assert self.decoder.rnn_size == self.hidden_size[
            1], 'check decoder input dim'
    def __init__(self, word_embed_size, hidden_size, vocab, dropout_rate=0.3, no_char_decoder=False):
        """ Init NMT Model.

        @param word_embed_size (int): Embedding size (dimensionality) of word
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()

        self.model_embeddings_source = ModelEmbeddings(word_embed_size, vocab.src)
        self.model_embeddings_target = ModelEmbeddings(word_embed_size, vocab.tgt)

        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        ### COPY OVER YOUR CODE FROM ASSIGNMENT 4

        self.encoder = nn.LSTM(input_size = word_embed_size , hidden_size = self.hidden_size , num_layers = 1 ,bidirectional = True , bias = True)
        self.decoder = nn.LSTMCell(input_size = (word_embed_size + self.hidden_size) , hidden_size = self.hidden_size, bias = True)

        self.h_projection = nn.Linear(2*self.hidden_size , self.hidden_size, bias = False)
        self.c_projection = nn.Linear(2*self.hidden_size , self.hidden_size, bias = False)
        self.att_projection = nn.Linear(2*self.hidden_size , self.hidden_size, bias = False)


        self.combined_output_projection = nn.Linear(3*self.hidden_size , self.hidden_size, bias = False)
        self.dropout = nn.Dropout(p = dropout_rate )
        self.target_vocab_projection = nn.Linear(self.hidden_size , len(self.vocab.tgt), bias = False)

        ### END YOUR CODE FROM ASSIGNMENT 4

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None
Пример #5
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        self.embed_size = embed_size

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)

        ### TODO - Initialize the following variables:
        self.encoder = nn.LSTM(self.embed_size,
                               self.hidden_size,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(self.hidden_size + self.embed_size,
                                   self.hidden_size)
        self.h_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(self.hidden_size * 3,
                                                    self.hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(self.dropout_rate)
Пример #6
0
    def __init__(self, vocab, embed_size, embeddings, sim_scale=5):
        """
        @param vocab (Vocab): vocab object
        @param embed_size (int): embedding size
        @param embeddings (torch.tensor (len(vocab), embed_dim)): pretrained word embeddings
        @param sim_scale (float): scale the sim score by this scalar
        """
        super(AvgSim, self).__init__()
        self.pretrained_embeddings = embeddings
        self.embeddings = ModelEmbeddings(vocab, embed_size, self.pretrained_embeddings)
        self.vocab = vocab
        self.sim_scale = sim_scale

        self.scoring_fn = nn.CosineSimilarity(dim=-1)
Пример #7
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for  documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=self.hidden_size,
                               bidirectional=True,
                               bias=True)
        self.decoder = nn.LSTMCell(input_size=hidden_size + embed_size,
                                   hidden_size=self.hidden_size,
                                   bias=True)
        self.h_projection = nn.Linear(in_features=2 * self.hidden_size,
                                      out_features=self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=2 * self.hidden_size,
                                      out_features=self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=2 * self.hidden_size,
                                        out_features=self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(
            in_features=3 * self.hidden_size,
            out_features=self.hidden_size,
            bias=False)
        self.target_vocab_projection = nn.Linear(in_features=self.hidden_size,
                                                 out_features=len(
                                                     self.vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(self.dropout_rate)
Пример #8
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # Bidirectional LSTM with bias
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=self.hidden_size,
                               bidirectional=True,
                               bias=True)
        # LSTM Cell with bias
        self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size,
                                   hidden_size=self.hidden_size,
                                   bias=True)
        # Linear Layer with no bias, W_{h}
        self.h_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        # Linear Layer with no bias, W_{c}
        self.c_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        # Linear Layer with no bias, W_{attProj}
        self.att_projection = nn.Linear(in_features=self.hidden_size * 2,
                                        out_features=self.hidden_size,
                                        bias=False)
        # Linear Layer with no bias, W_{u}
        self.combined_output_projection = nn.Linear(
            in_features=self.hidden_size * 3,
            out_features=self.hidden_size,
            bias=False)
        # Linear Layer with no bias, W_{vocab}
        self.target_vocab_projection = nn.Linear(in_features=self.hidden_size,
                                                 out_features=len(vocab.tgt),
                                                 bias=False)
        # Dropout Layer
        self.dropout = nn.Dropout(p=self.dropout_rate)
Пример #9
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # For sanity check only, not relevant to implementation
        self.gen_sanity_check = False
        self.counter = 0

        self.encoder = torch.nn.LSTM(
            input_size=embed_size,
            hidden_size=self.hidden_size,
            bias=True,
            bidirectional=True,
        )
        self.decoder = torch.nn.LSTMCell(
            input_size=embed_size + hidden_size,
            hidden_size=self.hidden_size,
            bias=True,
        )
        self.h_projection = torch.nn.Linear(
            in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.c_projection = torch.nn.Linear(
            in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.att_projection = torch.nn.Linear(
            in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.combined_output_projection = torch.nn.Linear(
            in_features=3 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.target_vocab_projection = torch.nn.Linear(
            in_features=self.hidden_size, out_features=len(self.vocab.tgt), bias=False
        )
        self.dropout = torch.nn.Dropout(p=self.dropout_rate)
Пример #10
0
 def __init__(self,
              input_size,
              hidden_size,
              vocab,
              fasttext_model,
              device='cpu'):
     super(LSTMModel, self).__init__()
     self.hidden_size = hidden_size
     self.input_size = input_size
     self.vocab = vocab
     self.embedding = ModelEmbeddings(input_size, vocab, fasttext_model,
                                      device)
     self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True)
     self.linear = nn.Linear(self.hidden_size * 2,
                             self.hidden_size,
                             bias=True)
     self.linear2 = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
     self.attention = Attention(self.hidden_size)
Пример #11
0
 def __init__(self,
              embed_size,
              hidden_size,
              src_vocab: Vocabulary,
              dst_vocab: Vocabulary,
              device,
              dropout_rate=0.2):
     super(NMT, self).__init__()
     self.device = device
     self.model_embeddings = ModelEmbeddings(embed_size, src_vocab,
                                             dst_vocab)
     self.hidden_size = hidden_size
     self.src_vocab = src_vocab
     self.dst_vocab = dst_vocab
     self.dropout_rate = dropout_rate
     # encoder是双向LSTM,有bias
     self.encoder = nn.LSTM(input_size=embed_size,
                            hidden_size=hidden_size,
                            bidirectional=True,
                            dropout=dropout_rate,
                            bias=True)
     # decoder是单向LSTM,有bias
     self.decoder = nn.LSTMCell(
         input_size=embed_size + hidden_size,
         # input-feeding方法:将注意力向量和下一个时间步的输入连接在一起,使模型在做对齐决策时,也会考虑过去的对齐信息
         hidden_size=hidden_size,
         bias=True)
     # h_projection, c_projection分别是src对decoder状态和cell的初始化
     self.h_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False)
     self.c_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False)
     # att_projection是src对decoder隐空间的映射(到context vector)
     self.att_projection = nn.Linear(hidden_size * 2,
                                     hidden_size,
                                     bias=False)
     # attention向量和下个时间步的输入连接在一起输入decoder
     self.combined_output_projection = nn.Linear(hidden_size * 2 +
                                                 hidden_size,
                                                 hidden_size,
                                                 bias=False)
     # decoder神经网络的输入到vocab的映射
     self.target_vocab_projection = nn.Linear(hidden_size,
                                              len(dst_vocab),
                                              bias=False)
     self.dropout = nn.Dropout(dropout_rate)
Пример #12
0
    def __init__(self, vocab, embed_size, embeddings, hidden_size,
                 dropout_rate):
        """
        @param vocab (Vocab): vocab object
        @param embed_size (int): embedding size
        @param embeddings (torch.tensor (len(vocab), embed_dim)): pretrained word embeddings
        @param hidden_size (int): hidden size
        @param dropout_rate (float): dropout prob
        """
        super(NeuralModel, self).__init__()
        self.pretrained_embeddings = embeddings
        self.embeddings = ModelEmbeddings(vocab, embed_size,
                                          self.pretrained_embeddings)
        self.vocab = vocab
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate

        self.h_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)
        self.combined_out_projection = nn.Linear(self.hidden_size * 3,
                                                 self.hidden_size,
                                                 bias=False)
        self.vocab_projection = nn.Linear(self.hidden_size,
                                          len(self.vocab),
                                          bias=False)

        self.dropout = nn.Dropout(self.dropout_rate)

        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=self.hidden_size,
                               bias=True,
                               bidirectional=True)

        self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size,
                                   hidden_size=self.hidden_size,
                                   bias=True)
Пример #13
0
def load_dev_data(embed_size=50, dev_perct=1., binary=False):
    M = ModelEmbeddings(embed_size=embed_size)
    X = [
        labeledTree.to_labeled_lines()[0][1].split(" ")
        for labeledTree in data['dev']
    ]
    Y = [labeledTree.to_labeled_lines()[0][0] for labeledTree in data['dev']]

    if binary:
        X = [x for (x, y) in list(zip(X, Y)) if y != 3]
        Y = [1 if y > 3 else 0 for y in Y if y != 3]

    dev_size = int(len(X) * dev_perct)
    X = X[:dev_size]
    Y = Y[:dev_size]
    X = M.embed_sentence(X)

    # dev data doesn't need to be augmented, hence it's already zipped and
    # ready to be passed into model.forward()
    return list(zip(X, Y))
Пример #14
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for  documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ###
        ### YOUR CODE HERE (~8 Lines)
        ###
        self.embed_size = embed_size
        self.encoder = nn.LSTM(self.embed_size, self.hidden_size, bias=True, bidirectional=True) # do i have to do make self.embed_size? Also I think since bidirectional is specified I don't need to use 2*self.hidden_size
        self.decoder = nn.LSTMCell(self.hidden_size + embed_size, self.hidden_size, bias=True) # need input size, hidden size. I think they are the same, except that for the input you concatenate the embedding for the current word. 
        self.h_projection = nn.Linear(2*self.hidden_size, self.hidden_size, bias=False) # W_h 
        self.c_projection = nn.Linear(2*self.hidden_size, self.hidden_size, bias=False) # W_c
        self.att_projection = nn.Linear(2*self.hidden_size, self.hidden_size, bias=False) # W_attProj Not sure about this one; it seems to actually take two inputs, h^dec_t to the left and h^enc_i to the right.
        self.combined_output_projection = nn.Linear(3*self.hidden_size, self.hidden_size, bias=False) # W_u
        self.target_vocab_projection = nn.Linear(self.hidden_size, len(self.vocab.tgt), bias=False) # W_vocab. Is len(self.vocab.tgt) the length of the target vocab? that is what we want.
        self.dropout = nn.Dropout(self.dropout_rate) #Dropout layer.
        ###
        ### END YOUR CODE
        ###
        '''TODO - Initialize the following variables:
Пример #15
0
 def __init__(self,
              vocab,
              embed_size,
              hidden_size,
              enc_bidir,
              attn_size,
              dropout=0.2):
     super(QGModel, self).__init__()
     self.vocab = vocab
     self.args = {
         'embed_size': embed_size,
         'hidden_size': hidden_size,
         'dropout': dropout,
         'enc_bidir': enc_bidir,
         'attn_size': attn_size
     }
     self.embeddings = ModelEmbeddings(embed_size, vocab)
     self.encoder = Encoder(embed_size, hidden_size, dropout, enc_bidir)
     self.decoder_init_hidden_proj = nn.Linear(self.encoder.hidden_size,
                                               hidden_size)
     self.decoder = Decoder(embed_size, hidden_size, attn_size,
                            len(vocab.tgt), dropout)
Пример #16
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate):
        super(Node2, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        print("vocab.num_labels: ", vocab.num_labels)
        self.num_labels = vocab.num_labels

        self.encoder0 = nn.LSTM(
            input_size=embed_size,
            hidden_size=hidden_size,
            bias=True,
            # dropout=self.dropout_rate,
            bidirectional=True)

        self.encoder1 = nn.LSTM(
            input_size=embed_size,
            hidden_size=hidden_size,
            bias=True,
            # dropout=self.dropout_rate,
            bidirectional=True)

        self.dropout1 = nn.Dropout()

        self.attention_projection = nn.Linear(in_features=2 * hidden_size,
                                              out_features=self.num_labels,
                                              bias=False)
        self.attention_softmax = nn.Softmax(dim=0)
        #         self.labels_projection = nn.Linear(in_features=2*hidden_size,
        #                                           out_features=1,
        #                                           bias=False)
        self.labels_projection = nn.Linear(in_features=2 * hidden_size,
                                           out_features=100,
                                           bias=False)

        self.labels_projection2 = nn.Linear(in_features=100,
                                            out_features=1,
                                            bias=False)
Пример #17
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ 初始化 NMT 模型.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): 词总述,包括 src 和 tgt
        @param dropout_rate (float): 对注意力的dropout概率
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # 初始化各层次
        # LSTM层 输入词嵌入,输出隐藏状态
        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               dropout=self.dropout_rate,
                               bidirectional=True)  # 可以选择双向
        # LSTMCell 输入词嵌入与隐藏状态连接,输出隐藏状态
        self.decoder = nn.LSTMCell(embed_size + self.hidden_size,
                                   self.hidden_size)  # 可以控制每个时间步
        self.h_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)  # 降维2h->h
        self.c_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)  # 降维2h->h
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)  # 降维2h->h
        self.combined_output_projection = nn.Linear(self.hidden_size * 3,
                                                    self.hidden_size,
                                                    bias=False)  # 降维3h->h
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)  # 输出投影到词库
        self.dropout = nn.Dropout(p=self.dropout_rate)
Пример #18
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate):
        super(Node, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        print("vocab.num_labels: ", vocab.num_labels)
        self.num_labels = vocab.num_labels
        
#         self.encoder = nn.LSTM(input_size=embed_size,
#                                hidden_size=hidden_size, 
#                                bias=True, 
#                                # dropout=self.dropout_rate,
#                                bidirectional=True)
        
        self.first_bilstm = BiLSTM(embed_size=embed_size,
                                    hidden_size=hidden_size,
                                    dropout_rate=dropout_rate,
                                    vocab=vocab)
        self.second_bilstm = BiLSTM(embed_size=embed_size,
                                    hidden_size=hidden_size,
                                    dropout_rate=dropout_rate,
                                    vocab=vocab)
Пример #19
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for  documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None
Пример #20
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        # LSTM parameters
        # input_size – The number of expected features in the input x
        # hidden_size – The number of features in the hidden state h
        # num_layers – Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results. Default: 1
        # bias – If False, then the layer does not use bias weights b_ih and b_hh. Default: True
        # batch_first – If True, then the input and output tensors are provided as (batch, seq, feature). Default: False
        # dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0
        # bidirectional – If True, becomes a bidirectional LSTM. Default: False
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               num_layers=1,
                               bias=True,
                               bidirectional=True)

        # LSTMCell parameters
        # input_size – The number of expected features in the input x
        # hidden_size – The number of features in the hidden state h
        # bias – If False, then the layer does not use bias weights b_ih and b_hh. Default: True
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)

        # Linear parameters
        # in_features – size of each input sample
        # out_features – size of each output sample
        # bias – If set to False, the layer will not learn an additive bias. Default: True
        self.h_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=2 * hidden_size,
                                        out_features=hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(in_features=3 *
                                                    hidden_size,
                                                    out_features=hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(in_features=hidden_size,
                                                 out_features=len(
                                                     self.vocab.tgt),
                                                 bias=False)

        self.dropout = nn.Dropout(p=dropout_rate)
Пример #21
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 spectrum_cnn_kernel_size=3,
                 location_attention_window=64,
                 no_char_decoder=False):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()

        # self.voiceCNN = VoiceCNN(embed_size, 5)
        self.location_attention_window = location_attention_window
        self.spectrum_cnn_kernel_size = spectrum_cnn_kernel_size
        self.spectrumCNN = nn.Conv1d(embed_size, embed_size,
                                     self.spectrum_cnn_kernel_size)
        # self.model_embeddings_source = ModelEmbeddings(embed_size, vocab.src)
        self.model_embeddings_target = ModelEmbeddings(embed_size, vocab.tgt)

        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # COPY OVER YOUR CODE FROM ASSIGNMENT 4

        self.encoder = torch.nn.LSTM(embed_size,
                                     hidden_size,
                                     bidirectional=True)
        self.decoder = torch.nn.LSTMCell(embed_size + hidden_size, hidden_size)
        self.h_projection = torch.nn.Linear(2 * hidden_size,
                                            hidden_size,
                                            bias=False)
        self.c_projection = torch.nn.Linear(2 * hidden_size,
                                            hidden_size,
                                            bias=False)
        self.loc_window = 5
        self.loc_att_projection = torch.nn.Linear(embed_size, 1, bias=False)
        self.loc_att_conv1D = nn.Conv1d(self.loc_window, embed_size, 1)
        self.att_projection = torch.nn.Linear(2 * hidden_size,
                                              hidden_size,
                                              bias=False)
        self.combined_output_projection = torch.nn.Linear(3 * hidden_size,
                                                          hidden_size,
                                                          bias=False)
        self.target_vocab_projection = torch.nn.Linear(hidden_size,
                                                       len(vocab.tgt),
                                                       bias=False)
        self.dropout = nn.Dropout(p=dropout_rate)

        # END YOUR CODE FROM ASSIGNMENT 4

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None
Пример #22
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        # YOUR CODE HERE (~8 Lines)
        # TODO - Initialize the following variables:
        # self.encoder (Bidirectional LSTM with bias)
        # self.decoder (LSTM Cell with bias)
        # self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        # self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        # self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        # self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        # self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        # self.dropout (Dropout Layer)
        ###
        # Use the following docs to properly initialize these variables:
        # LSTM:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        # LSTM Cell:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        # Linear Layer:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        # Dropout Layer:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               bias=True,
                               bidirectional=True)

        self.decoder = nn.LSTMCell(
            embed_size + self.hidden_size, self.hidden_size
        )  # embed_size+hidden_size means:concat input with the output of the last step.
        self.h_projection = nn.Linear(
            2 * self.hidden_size, self.hidden_size,
            bias=False)  # used to init the Hidden state of Decoder
        self.c_projection = nn.Linear(
            2 * self.hidden_size, self.hidden_size,
            bias=False)  # used to init the Cell state of Decoder

        self.att_projection = nn.Linear(
            2 * self.hidden_size, self.hidden_size, bias=False
        )  # change Encoder hidden state which is (2h, 1) to (h, 1)

        self.combined_output_projection = nn.Linear(
            3 * self.hidden_size, self.hidden_size,
            bias=False)  # attention是把Encoder的每个隐藏层乘上softmax得到的权重再求和

        self.dropout = nn.Dropout(dropout_rate)  # 用于 dropout 最后一个隐藏层状态

        self.target_vocab_projection = nn.Linear(
            self.hidden_size,
            len(vocab.tgt),
            bias=False  # 把最后的隐藏层状态 projection 到词典维度,softmax后,得到每个词的概率
        )
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 attention_type='additive_attention',
                 self_attention=False):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        self.attention_type = attention_type
        self.self_attention = self_attention

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               num_layers=1,
                               bias=True,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)
        self.h_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=2 * hidden_size,
                                        out_features=hidden_size,
                                        bias=False)
        self.Wproj_1 = nn.Linear(in_features=hidden_size,
                                 out_features=hidden_size,
                                 bias=False)
        self.Wproj_2 = nn.Linear(in_features=hidden_size,
                                 out_features=hidden_size,
                                 bias=False)
        self.scale_V = nn.Linear(in_features=hidden_size,
                                 out_features=1,
                                 bias=False)
        self.tanh = nn.Tanh()
        self.W_Mul = nn.Linear(in_features=hidden_size,
                               out_features=hidden_size,
                               bias=False)
        self.v_self = nn.Linear(in_features=hidden_size,
                                out_features=1,
                                bias=False)
        self.W_self = nn.Linear(in_features=hidden_size,
                                out_features=hidden_size,
                                bias=False)
        self.combined_output_projection = nn.Linear(in_features=3 *
                                                    hidden_size,
                                                    out_features=hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(in_features=hidden_size,
                                                 out_features=len(vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(p=dropout_rate)
Пример #24
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 no_char_decoder=False):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()

        self.model_embeddings_source = ModelEmbeddings(embed_size, vocab.src)
        self.model_embeddings_target = ModelEmbeddings(embed_size, vocab.tgt)

        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        self.embed_size = embed_size

        ### COPY OVER YOUR CODE FROM ASSIGNMENT 4

        # LSTM is an RNN
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               bidirectional=True,
                               bias=True)

        # LSTMCell is just one Cell
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)

        self.h_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=2 * hidden_size,
                                        out_features=hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(in_features=3 *
                                                    hidden_size,
                                                    out_features=hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(in_features=hidden_size,
                                                 out_features=len(vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(p=dropout_rate)

        ### END YOUR CODE FROM ASSIGNMENT 4

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None
Пример #25
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        ### (c)按照TODO中要求进行初始化

        ## LSTM层,输入的参数列表包括
        ##input_size:输入维度(即词向量的维度)
        ##hidden_size:隐藏层的维度(h的维度)
        ##num_layers:LSTM的层数(纵向堆叠深度)
        ##bias:是否需要偏置,默认为True
        ##batch_first:是否需要调换将batch作为第一维度(针对非(batch_size,seq_length,embedding_dim)的输入),默认为False
        ##dropout:dropout,默认为0
        ##bidirectional:LSTM双向与否,默认False
        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               bias=True,
                               bidirectional=True)

        ## LSTMCell层,单个的LSTM单元(结构上也恰好构成单层LSTM)
        ##(与LSTM不同处在于,LSTMCell输入为单个的x_t,而LSTM为序列x_0...x_T。如果需要跑完整个序列LSTMCell需要使用循环)
        ## 输入的参数列表包括
        ##input_size:输入维度(即词向量的维度)
        ##hidden_size:隐藏层的维度(h的维度)
        ##bias:是否需要偏置,默认为True
        self.decoder = nn.LSTMCell(embed_size + self.hidden_size,
                                   self.hidden_size,
                                   bias=True)

        ## 线性层,顾名思义。参数依次为:input_dimension,output_dimension,是否需要bias
        self.h_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(2 * self.hidden_size,
                                        self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(3 * self.hidden_size,
                                                    self.hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)

        ##dropout层,设置dropout_rate
        self.dropout = nn.Dropout(dropout_rate)
Пример #26
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
        """
        编码采用双向LSTM,输入为embed_size,隐藏层长度默认为hidden_size,bidirectional是双向LSTM。
        解码采用单项LSTM,因为输入的时候需要输出前一次结果(长度为embedding_size)和隐藏层结果(hidden_size)。
        """
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=self.hidden_size,
                               bidirectional=True,
                               bias=True)
        self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size,
                                   hidden_size=self.hidden_size,
                                   bias=True)
        """
        得到h_{0},双向LSTM编码后隐藏层的结果拼接后长度为2h,经过一个W_{h}或者W_{c}的权重矩阵处理后变为h,再输入到解码器中。
        """
        self.h_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        """
        隐藏层在输入过程中会通过attention机制处理,得到a_{t}。
        """
        self.att_projection = nn.Linear(in_features=self.hidden_size * 2,
                                        out_features=self.hidden_size,
                                        bias=False)

        self.combined_output_projection = nn.Linear(
            in_features=self.hidden_size * 3,
            out_features=self.hidden_size,
            bias=False)
        self.target_vocab_projection = nn.Linear(in_features=self.hidden_size,
                                                 out_features=len(
                                                     self.vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(p=self.dropout_rate)
Пример #27
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 no_char_decoder=False,
                 nmt_model=None):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        @param nmt_model (NMT): a5 NMT Model (without DPP) to initialize layers with
        """
        super(DPPNMT, self).__init__()
        if nmt_model is None:
            self.model_embeddings_source = ModelEmbeddings(
                embed_size, vocab.src)
            self.model_embeddings_target = ModelEmbeddings(
                embed_size, vocab.tgt)

            self.hidden_size = hidden_size
            self.dropout_rate = dropout_rate
            self.vocab = vocab
            self.embed_size = embed_size

            self.encoder = nn.LSTM(embed_size, hidden_size, bidirectional=True)
            self.decoder = nn.LSTMCell(embed_size + hidden_size, hidden_size)

            self.h_projection = nn.Linear(hidden_size * 2,
                                          hidden_size,
                                          bias=False)
            self.c_projection = nn.Linear(hidden_size * 2,
                                          hidden_size,
                                          bias=False)
            self.att_projection = nn.Linear(hidden_size * 2,
                                            hidden_size,
                                            bias=False)
            self.combined_output_projection = nn.Linear(hidden_size * 2 +
                                                        hidden_size,
                                                        hidden_size,
                                                        bias=False)
            self.target_vocab_projection = nn.Linear(hidden_size,
                                                     len(vocab.tgt),
                                                     bias=False)
            self.dropout = nn.Dropout(self.dropout_rate)

            if not no_char_decoder:
                self.charDecoder = CharDecoder(hidden_size,
                                               target_vocab=vocab.tgt)
            else:
                self.charDecoder = None
        else:
            self.model_embeddings_source = nmt_model.model_embeddings_source
            self.model_embeddings_target = nmt_model.model_embeddings_target

            self.hidden_size = nmt_model.hidden_size
            self.dropout_rate = nmt_model.dropout_rate
            self.vocab = nmt_model.vocab
            self.embed_size = nmt_model.model_embeddings_source.embed_size

            self.encoder = nmt_model.encoder
            self.decoder = nmt_model.decoder

            self.h_projection = nmt_model.h_projection
            self.c_projection = nmt_model.c_projection
            self.att_projection = nmt_model.att_projection
            self.combined_output_projection = nmt_model.combined_output_projection
            self.target_vocab_projection = nmt_model.target_vocab_projection
            self.dropout = nmt_model.dropout

            self.charDecoder = nmt_model.charDecoder
Пример #28
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None 
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None
        # For sanity check only, not relevant to implementation
        self.gen_sanity_check = False
        self.counter = 0


        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
        self.encoder = nn.LSTM(
            input_size=embed_size, 
            hidden_size=self.hidden_size, 
            bidirectional=True,
            dropout = self.dropout_rate,
            bias=True,
            )
        # hdec_t, cdec_t = Decoder(\bar y_t, hdec_t-1, cdec_t-1)
        # \bar y_t has size of (e+h, 1)
        self.decoder = nn.LSTMCell(
            input_size=embed_size + self.hidden_size, 
            hidden_size=self.hidden_size, 
            bias=True,
            )
        self.h_projection = nn.Linear(
            in_features=2*self.hidden_size,
            out_features=self.hidden_size,
            bias=False,
        )
        self.c_projection = nn.Linear(
            in_features=2*self.hidden_size,
            out_features=self.hidden_size,
            bias=False,
        )
        self.att_projection = nn.Linear(
            in_features=2*self.hidden_size,
            out_features=self.hidden_size,
            bias=False,
        )
        self.combined_output_projection = nn.Linear(
            in_features=3*self.hidden_size,
            out_features=self.hidden_size,
            bias=False,
        )
        self.target_vocab_projection = nn.Linear(
            in_features=self.hidden_size,
            out_features=len(self.vocab.tgt),
            bias=False,
        )
        self.dropout = nn.Dropout(self.dropout_rate)
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for  documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               dropout=self.dropout_rate,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(embed_size + self.hidden_size,
                                   self.hidden_size)
        self.h_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(hidden_size * 2,
                                        hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(3 * self.hidden_size,
                                                    self.hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(hidden_size,
                                                 len(vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(self.dropout_rate)
Пример #30
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)

        # encoder will be feed the word embeddings for the source sentence, and yield hidden states and cell states for both the forwards and backwards LSTMs
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               bidirectional=True,
                               bias=True)

        # decoder is initialized with a linear projection of the engcoder's final hidden state and final cell state, and feed the matching target sentence word embeddings
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)

        self.h_projection = nn.Linear(in_features=hidden_size * 2,
                                      out_features=hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=hidden_size * 2,
                                      out_features=hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=hidden_size * 2,
                                        out_features=hidden_size,
                                        bias=False)

        # transformation of decoder hidden states and context vectors before reading out target words
        # this produces the `attentional vector` in (Luong et al., 2015)
        self.combined_output_projection = nn.Linear(
            in_features=hidden_size * 2 + hidden_size,
            out_features=hidden_size,
            bias=False)

        # prediction layer of the target vocabulary
        self.target_vocab_projection = nn.Linear(in_features=hidden_size,
                                                 out_features=len(vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(dropout_rate)