def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts #print('sents: {}'.format(sents)) batch_size, max_sentence_length = len(sents), max( [len(s) for s in sents]) word_ids = self.words2charindices(sents) word_ids_padded = pad_sents_char( word_ids, self.char2id['<pad>'] ) # shape = (batch_size, max_sentence_length, max_word_length) max_word_length = len(word_ids_padded[0][0]) #print('word_ids_padded: {}'.format(word_ids_padded)) assert_expected_size( torch.tensor(word_ids_padded, device=device), 'word_ids_padded', [batch_size, max_sentence_length, max_word_length]) #sents_var = torch.reshape(torch.tensor(word_ids_padded), (max_sentence_length, batch_size, max_word_length)) sents_var = torch.tensor(word_ids_padded, device=device).permute(1, 0, 2) assert_expected_size( sents_var, 'sents_var', [max_sentence_length, batch_size, max_word_length]) #print('sents_var: {}'.format(sents_var)) return sents_var.contiguous()
def forward(self, input, dec_hidden=None): """ Forward pass of character decoder. @param input: tensor of integers, shape (length, batch) @param dec_hidden: internal state of the LSTM before reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size) @returns scores: called s in the PDF, shape (length, batch, self.vocab_size) @returns dec_hidden: internal state of the LSTM after reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size) """ ### YOUR CODE HERE for part 2b ### TODO - Implement the forward pass of the character decoder. length, batch_size = input.size() # length = m_word = number of characters in the word x_embeddings = self.decoderCharEmb(input) assert_expected_size(x_embeddings, 'x_embeddings', [length, batch_size, self.char_embedding_size]) enc_hiddens, (hn, cn) = self.charDecoder(x_embeddings, dec_hidden) assert_expected_size(enc_hiddens, 'enc_hiddens', [length, batch_size, self.hidden_size]) assert_expected_size(hn, 'hn', [1, batch_size, self.hidden_size]) assert_expected_size(cn, 'cn', [1, batch_size, self.hidden_size]) scores = self.char_output_projection(enc_hiddens) assert_expected_size(scores, 'scores', [length, batch_size, self.vocab_size]) return scores, (hn, cn)
def decode_greedy(self, initialStates, device, max_length=21): """ Greedy decoding @param initialStates: initial internal state of the LSTM, a tuple of two tensors of size (1, batch, hidden_size) @param device: torch.device (indicates whether the model is on CPU or GPU) @param max_length: maximum length of words to decode @returns decodedWords: a list (of length batch) of strings, each of which has length <= max_length. The decoded strings should NOT contain the start-of-word and end-of-word characters. """ ### YOUR CODE HERE for part 2d ### TODO - Implement greedy decoding. ### Hints: ### - Use target_vocab.char2id and target_vocab.id2char to convert between integers and characters ### - Use torch.tensor(..., device=device) to turn a list of character indices into a tensor. ### - We use curly brackets as start-of-word and end-of-word characters. That is, use the character '{' for <START> and '}' for <END>. ### Their indices are self.target_vocab.start_of_word and self.target_vocab.end_of_word, respectively. batch_size = len(initialStates[0][0]) current_char_id = torch.tensor([self.target_vocab.start_of_word]*batch_size, device=device).reshape((1, batch_size)) assert_expected_size(current_char_id, 'current_char_id', [1, batch_size]) decodedWords = ['']*batch_size dec_hidden = initialStates for i in range(max_length): scores, dec_hidden = self.forward(current_char_id, dec_hidden) assert_expected_size(scores, 'scores', [1, batch_size, self.vocab_size]) current_char_id = torch.argmax(scores, dim=2) decodedWords = [d+self.target_vocab.id2char[c.item()] for (c, d) in zip(current_char_id.squeeze(dim=0), decodedWords)] decodedWords = [d[:d.find('}')] if '}' in d else d for d in decodedWords] return decodedWords
def question_1c_sanity_check(): ''' Sanity check for highway.py class implementation ''' print("-" * 80) print("Running Sanity Check for Question 1c: Highway") print("-" * 80) highway = Highway(EMBED_SIZE) # Reinitialize weights highway.w_projection.weight.data.fill_(0.3) highway.w_projection.bias.data.fill_(0.1) highway.w_gate.weight.data.fill_(0.3) highway.w_gate.bias.data.fill_(0.1) x_conv_out = torch.ones(BATCH_SIZE, EMBED_SIZE) output = highway.forward(x_conv_out) assert_expected_size(output, 'output', [BATCH_SIZE, EMBED_SIZE]) print(output)
def train_forward(self, char_sequence, dec_hidden=None): """ Forward computation during training. @param char_sequence: tensor of integers, shape (length, batch). Note that "length" here and in forward() need not be the same. @param dec_hidden: initial internal state of the LSTM, obtained from the output of the word-level decoder. A tuple of two tensors of shape (1, batch, hidden_size) @returns The cross-entropy loss, computed as the *sum* of cross-entropy losses of all the words in the batch, for every character in the sequence. """ ### YOUR CODE HERE for part 2c ### TODO - Implement training forward pass. ### ### Hint: - Make sure padding characters do not contribute to the cross-entropy loss. ### - char_sequence corresponds to the sequence x_1 ... x_{n+1} from the handout (e.g., <START>,m,u,s,i,c,<END>). length, batch_size = char_sequence.size() # length = m_word = number of characters in the word scores, (hn, cn) = self.forward(char_sequence, dec_hidden) assert_expected_size(scores, 'scores', [length, batch_size, self.vocab_size]) assert_expected_size(hn, 'hn', [1, batch_size, self.hidden_size]) assert_expected_size(cn, 'cn', [1, batch_size, self.hidden_size]) loss = nn.CrossEntropyLoss(reduction='sum') cross_entropy_loss = 0 for i in range(batch_size): if self.target_vocab.end_of_word in char_sequence[:,i]: end_id = (char_sequence[:,i]==self.target_vocab.end_of_word).nonzero() cross_entropy_loss += loss(scores[:end_id,i,:], char_sequence[1:end_id+1,i]) return cross_entropy_loss
def forward(self, x_reshaped: torch.Tensor) -> torch.Tensor: ''' @param x_reshaped (Tensor): Tensor of padded source sentences with shape (b, e_char, m_word), where b = batch_size, e_word = word embedding length and m_word = max_word_length = max characters in a word @returns x_conv_out (Tensor) : Tensor of padded source sentences with shape (b, e_word) ''' batch_size, m_word = len(x_reshaped), len(x_reshaped[0][0]) e_char, e_word, kernel_size = self.char_embed_size, self.word_embed_size, self.kernel_size #print('x_reshaped size = {}'.format(x_reshaped.size())) assert_expected_size(x_reshaped, 'x_reshaped', [batch_size, e_char, m_word]) x_conv = self.cnn(x_reshaped) assert_expected_size(x_conv, 'x_conv', [batch_size, e_word, m_word - kernel_size + 1]) #print('x_conv size = {}'.format(x_conv.size())) relu = nn.ReLU() maxpool = nn.MaxPool1d(m_word - kernel_size + 1) x_conv_out = relu(x_conv) #print('x_conv_out (after relu) size = {}'.format(x_conv_out.size())) x_conv_out = maxpool(x_conv_out) #print('x_conv_out (after maxpool) size = {}'.format(x_conv_out.size())) x_conv_out = torch.squeeze(x_conv_out, dim=2) #print('x_conv_out size (after squeeze) = {}'.format(x_conv_out.size())) assert_expected_size(x_conv_out, 'x_conv_out', [batch_size, e_word]) return x_conv_out
def forward(self, x_conv_out: torch.Tensor) -> torch.Tensor: ''' @param x_conv_out (Tensor): Tensor of padded source sentences with shape (b, e_word), where b = batch_size, e_word = word embedding length. These are the outputs from convolutional neural network @returns x_highway (Tensor) : Tensor of padded source sentences with shape (b, e_word) ''' batch_size, e_word = len(x_conv_out), self.word_embed_size assert_expected_size(x_conv_out, 'x_conv_out', [batch_size, e_word]) relu = nn.ReLU() x_proj = relu(self.w_projection(x_conv_out)) # shape = (b, e_word) assert_expected_size(x_proj, 'x_proj', [batch_size, e_word]) x_gate = torch.sigmoid(self.w_gate(x_conv_out)) # shape = (b, e_word) assert_expected_size(x_gate, 'x_gate', [batch_size, e_word]) x_highway = x_gate*x_proj + (1-x_gate)*x_conv_out assert_expected_size(x_highway, 'x_highway', [batch_size, e_word]) return x_highway
def forward(self, input_tensor): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input_tensor: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ sentence_length, batch_size, m_word = input_tensor.size() e_char, e_word = self.char_embed_size, self.embed_size #print('input_tensor size = {}'.format(input_tensor.size())) #print('input_tensor = {}'.format(input_tensor)) # Reshaping input tensor with a revised batch_size = sentence_length*batch_size x_padded = torch.reshape(input_tensor, (sentence_length * batch_size, m_word)) assert_expected_size(x_padded, 'x_padded', [sentence_length * batch_size, m_word]) #print('x_padded size = {}'.format(x_padded.size())) #print('x_padded = {}'.format(x_padded)) x_emb = self.embeddings(x_padded) assert_expected_size(x_emb, 'x_emb', [sentence_length * batch_size, m_word, e_char]) #print('x_emb size = {}'.format(x_emb.size())) x_reshaped = x_emb.permute(0, 2, 1) assert_expected_size(x_reshaped, 'x_reshaped', [sentence_length * batch_size, e_char, m_word]) #print('x_reshaped size = {}'.format(x_reshaped.size())) x_conv_out = self.cnn.forward(x_reshaped) assert_expected_size(x_conv_out, 'x_conv_out', [sentence_length * batch_size, e_word]) #print('x_conv_out size = {}'.format(x_conv_out.size())) x_highway = self.highway.forward(x_conv_out) assert_expected_size(x_highway, 'x_highway', [sentence_length * batch_size, e_word]) #print('x_highway size = {}'.format(x_highway.size())) x_word_emb = self.dropout(x_highway) assert_expected_size(x_word_emb, 'x_word_emb', [sentence_length * batch_size, e_word]) #print('x_word_emb size = {}'.format(x_word_emb.size())) #output = torch.reshape(x_word_emb, (sentence_length, batch_size, e_word)) #assert_expected_size(output, 'output', [sentence_length, batch_size, e_word]) output = torch.reshape(x_word_emb, (sentence_length, batch_size, e_word)) assert_expected_size(output, 'output', [sentence_length, batch_size, e_word]) #print('output size = {}'.format(output.size())) return output