def question_1h_sanity_check(model): """ Sanity check for highway network """ print("-" * 80) print("Running Sanity Check for Question 1h: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) #padded_sentences = pad_sents_char(word_ids, 0) padded_sentences = vocab.to_input_tensor_char(sentences, model.device) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') #Test with batch size 1 x = torch.rand(1, 1, 21) hw = Highway(21, 21, 21, 0.5) hw.forward(x) #Test with batch size 4 print(a.size()) print(padded_sentences.size()) #assert padded_sentences.size() == a.size(), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(a.size(), padded_sentences.size()) print("Sanity Check Passed for Question 1h: Padding!") print("-" * 80)
def __init__(self, word_embed_size, vocab): """ Init the Embedding layer for one language @param word_embed_size (int): Embedding size (dimensionality) for the output word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. Hints: - You may find len(self.vocab.char2id) useful when create the embedding """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # notice that in assignment 4 vocab is of type (Vocab), not (VocabEntry) as assignment 5. # self.embeddings = nn.Embedding(len(vocab.src), word_embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1h pad_token_idx = vocab.char2id['∏'] char_embed_size, window_size, p_dropout = 50, 5, 0.3 self.char_embedding = nn.Embedding(num_embeddings=len(vocab.char2id), embedding_dim=char_embed_size, padding_idx=pad_token_idx) self.cnn = CNN(char_embed_size=char_embed_size, word_embed_size=word_embed_size, window_size=window_size) self.highway = Highway(word_embed_size=word_embed_size) self.dropout = nn.Dropout(p=p_dropout) self.word_embed_size = word_embed_size
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j # there is two problems # 1. Now that embed_size is for output, # so why A4 code take embed_size as param for self.embeddings? # remember we take e_{char} = 50 # 2. VocabEntry object doesn't own the attribute 'src' pad_token_idx = vocab.char2id['<pad>'] embed_size_char = 50 self.char_embedding = nn.Embedding(len(vocab.char2id), embed_size_char, padding_idx=pad_token_idx) self.convNN = CNN(f=embed_size) self.highway = Highway(embed_size=embed_size) self.dropout = nn.Dropout(p=0.3)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.embed_size = embed_size self.vocab = vocab self.e_char = 50 self.embeddings = nn.Embedding(len(vocab.char2id), self.e_char, padding_idx=vocab.char2id['<pad>']) self.cnn = CNN(self.e_char, k=5, m_word=21, output_channels=self.embed_size) self.highway = Highway(self.embed_size) self.dropout = nn.Dropout(p=0.3)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab.char2id['<pad>'] self.embed_size = embed_size self.e_char = 50 # e_char = 50 (see 1. (j) bullet point 5) self.e_word = embed_size # e_word self.char_embed = nn.Embedding(num_embeddings=len(vocab.char2id), embedding_dim=self.e_char, padding_idx=pad_token_idx) self.vocab = vocab self.highway = Highway(embed_size) self.cnn = CNN(input_channel_count=self.e_char, output_channel_count=self.e_word) self.dropout = nn.Dropout(p=0.3)
def high_sanity_check(): """ Sanity check for Highway network function. """ print ("-"*80) print("Running Sanity Check for Question 1h: Highway network implementation") print ("-"*80) #compute the network manually input = torch.rand(BATCH_SIZE, EMBED_SIZE) proj_w = torch.rand(EMBED_SIZE, EMBED_SIZE) proj_b = torch.rand(EMBED_SIZE) gate_w = torch.rand(EMBED_SIZE, EMBED_SIZE) gate_b = torch.rand(EMBED_SIZE) x_proj = F.relu(torch.mm(proj_w, input.transpose(0, 1)) + proj_b.repeat(BATCH_SIZE, 1).transpose(1, 0)) x_gate = torch.sigmoid(torch.mm(gate_w, input.transpose(0, 1)) + gate_b.repeat(BATCH_SIZE,1).transpose(1, 0)) x_highway = x_gate * x_proj + (1 - x_gate) * x_proj x_highway = x_highway.transpose(1, 0) hw = Highway(EMBED_SIZE) #initialize the weights of the network the same as the the manual one hw.proj_layer.weight = torch.nn.Parameter(proj_w) hw.proj_layer.bias = torch.nn.Parameter(proj_b) hw.gate_layer.weight = torch.nn.Parameter(gate_w) hw.gate_layer.bias = torch.nn.Parameter(gate_b) hw.eval() output = hw(input) assert torch.equal(output, x_highway), "Output size is incorrect: it should be:\n {} but is:\n{}".format(input.size(), output.size()) print("Sanity Check Passed for Question 1h: High network implementation!") print("-"*80)
def question_1h_sanity_check(): """ Sanity check for to input tensor char() function. """ print("-" * 80) print( "Running Sanity Check for Question 1h: initializing and forwarding in Highway network" ) print("-" * 80) X_conv_out = torch.randn((5, 4, 3)) highway = Highway(X_conv_out.size(-1)) X_highway = highway(X_conv_out) assert X_conv_out.size() == X_highway.size(), \ "Output size should be: {}\n but is {}\n".format(X_conv_out.size(), X_highway.size()) X_conv_out_2 = torch.ones((5, 4, 3)) highway_2 = Highway(X_conv_out_2.size(-1)) X_highway_2 = highway_2(X_conv_out_2) assert X_conv_out.size() == X_highway_2.size(), \ "Assertion 2: output size should be: {}\n but is {}\n".format(X_conv_out.size(), X_highway.size()) print(X_highway_2) print("Shape is right!") print( "Sanity Check Passed for Question 1h: Initializing and Forwarding in Highway Network!" ) print("-" * 80)
def question_1f_sanity_check(): """ Sanity check for Highway Class init and forward methods """ print("-" * 80) print("Running Sanity Check for Question 1f: Highway layer") print("-" * 80) print("Running test on a list of out conv layers") B = 4 e_word = 3 conv_out = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) #4*3 high_layer = Highway(e_word) my_high = high_layer.forward(conv_out) output_expected_size = [B, e_word] assert list( my_high.size() ) == output_expected_size, "output shape is incorrect: it should be:\n {} but is:\n{}".format( output_expected_size, list(my_high.size())) print("Sanity Check Passed for Question 1e: Correct Output Shape!") print("-" * 80)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output. That is, e_word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab.char2id['<pad>'] # num_embeddings = how many 'idxs' we have that is used to code characters. Something like char_vocab_size. self.char_dimension = 50 self.word_dimension = embed_size self.max_word_length = 21 # Copied over from utils self.dropout_prob = 0.3 self.embed_size = embed_size self.embeddings = nn.Embedding(num_embeddings=len(vocab.id2char), embedding_dim=self.char_dimension, padding_idx=pad_token_idx) self.cnn_layer = CNN(char_dimension=self.char_dimension, max_word_length=self.max_word_length, out_channels=self.word_dimension) self.highway_layer = Highway(self.word_dimension) self.dropout_layer = nn.Dropout(p=self.dropout_prob)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.embed_size = embed_size self.embedding_char_size = 50 self.dropout_rate = 0.3 self.max_word_length = 21 self.embedding_word_size = embed_size pad_token_idx = vocab.char2id['<pad>'] self.charEmbeddings = nn.Embedding(len(vocab.char2id), self.embedding_char_size, padding_idx=pad_token_idx) self.dropout = nn.Dropout(p=self.dropout_rate) #construct CNN self.CNN = CNN(self.embedding_char_size, self.embedding_word_size, self.max_word_length) #construct Highway self.highway = Highway(self.embedding_word_size)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1f # Test calls use this, my this name self.embed_size = embed_size self.embed_word_size = embed_size self.vocab = vocab # vocab for a particular language self.dropout_prob = 0.3 self.embed_char_size = 50 self.max_vocab_tokens_in_word = 21 self.embed_layer = nn.Embedding(num_embeddings=len(self.vocab.char2id), embedding_dim=self.embed_char_size, padding_idx=vocab.char2id['<pad>']) self.cnn_module = CNN(num_input_channels=self.embed_char_size, num_out_channels=self.embed_word_size, input_embed_size=self.max_vocab_tokens_in_word) self.highway_module = Highway(word_embedding_size=self.embed_word_size) self.dropout = nn.Dropout(p=self.dropout_prob)
def __init__(self, word_embed_size, vocab): """ Init the Embedding layer for one language @param word_embed_size (int): Embedding size (dimensionality) for the output word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. Hints: - You may find len(self.vocab.char2id) useful when create the embedding """ super(ModelEmbeddings, self).__init__() ### YOUR CODE HERE for part 1h self.e_char = 50 self.word_embed_size = word_embed_size self.dropout_prob = 0.3 self.vocab = vocab # apparently 21 is an arbitrary value chosen for the sanity tests and also the greedy decoder self.max_word_len = 21 self.embedding = nn.Embedding(len(vocab.char2id), self.e_char, padding_idx=vocab.char2id['∏']) self.cnn = CNN(e_char=self.e_char, filters=self.word_embed_size, kernel_size=5, m_word=self.max_word_len) self.highway = Highway(word_embed_size) self.dropout = nn.Dropout(p=self.dropout_prob)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab.char2id['<pad>'] self.char_embed_size = 50 self.dropout_rate = 0.3 self.max_word_size = 21 self.word_embed_size = embed_size self.embed_size = embed_size self.v_char = len(vocab.char2id) self.v_word = len(vocab.word2id) self.embeddings = nn.Embedding(self.v_char, self.char_embed_size, padding_idx=pad_token_idx) self.Dropout = nn.Dropout(p=self.dropout_rate) self.cnn = CNN(e_char=self.char_embed_size, e_word=self.word_embed_size, m_word=self.max_word_size) self.highway = Highway(embedding_size=self.word_embed_size)
def highway_sanity_check(): print("Highway Input" + str([1])) embed_size = 1 cnn_out = torch.Tensor([1]) highway = Highway(embed_size) result = highway.forward(cnn_out) print("Highway Output " + str(result)) assert (result.size() == cnn_out.size()) print("-----Shape Test Passed -----")
def test_highway(): print() print("==="*30) print("\nHighway Class test") e_word = 3 x_conv_out = torch.tensor( [ [ [0, 1, 1], # sentence a's word 1 [-1, 1, 0] # sentence b's word 1 ], [ [1, 0, 0], # sentence a's word 2 [0, 1, 0] # sentence a's word 2 ] ], dtype=torch.float, device=device ) sent_len = x_conv_out.shape[0] batch_size = x_conv_out.shape[0] correct_x_highway = np.array( [ [ [ 0., 0.38797045, 0.57840323], # sentence a's word 1 [-0.03674287, 0.4926422, 0.22739217] # sentence b's word 1 ], [ [ 0.58957815, 0., 0.], # sentence a's word 2 [ 0.24245806, 0.47267026, 0.18764845] # sentence b's word 2 ] ] ) model = Highway(e_word).to(device) obtained_x_highway = model.forward(torch.flatten(x_conv_out, 0, 1)) obtained_x_highway = torch.stack(torch.split(obtained_x_highway, batch_size, dim=0)) obtained_x_highway = obtained_x_highway.cpu().detach().numpy() assert np.allclose(correct_x_highway, obtained_x_highway), \ "\n\nIncorrect x_highway\n\nCorrect x_highway:\n{}\n\nYour x_highway:\n{}". \ format(correct_x_highway, obtained_x_highway) print("\nx_highway =\n", obtained_x_highway) # # Check the weights # print("\nWproj weights:\n", model.Wproj.weight.cpu().detach().numpy()) # print("\nWproj bias:\n", model.Wproj.bias.cpu().detach().numpy()) # print("\n\nWgate weights:\n", model.Wgate.weight.cpu().detach().numpy()) # print("\nWgate bias:\n", model.Wgate.bias.cpu().detach().numpy()) print("\n\nHighway Test Passed!\n") print("==="*30)
def question_1f_sanity_check(): sentence_length = 10 batch_size = 64 embedding_size = 128 x = torch.Tensor(np.ones((sentence_length, batch_size, embedding_size))) highway_instance = Highway(embedding_size) output = highway_instance.forward(x) assert (sentence_length, batch_size, embedding_size) == output.shape # test given closed gate output == input highway_instance = Highway(embedding_size) nn.init.constant_(highway_instance.gate_layer.weight, -float("inf")) highway_instance.gate_layer.bias.data.fill_(0) output = highway_instance.forward(x) assert torch.all(torch.eq(output, x)) # test given open gate and identity projection output == input highway_instance = Highway(embedding_size) nn.init.constant_(highway_instance.gate_layer.weight, float("inf")) highway_instance.gate_layer.bias.data.fill_(0) nn.init.eye_(highway_instance.projection_layer.weight) highway_instance.projection_layer.bias.data.fill_(0) output = highway_instance.forward(x) assert torch.all(torch.eq(output, x))
def question_1h_sanity_check(): print("-" * 80) print("Running Sanity Check for Question 1h: Model Embedding") print("-" * 80) model = Highway(3) x = torch.tensor([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype=torch.float) print(x.shape) res = model.forward(x) print(res) assert res.shape == x.shape print("Sanity Check Passed for Question 1h: Highway!") print("-" * 80)
def test_shape(self): print("-" * 80) print("Running Sanity Check for Question 1d: Highway Shape") print("-" * 80) batch_size, word_embed_size = 64, 40 highway = Highway(word_embed_size) x_conv_out = torch.randn([batch_size, word_embed_size]) x_word_emb = highway.forward(x_conv_out) self.assertEqual(x_word_emb.shape, (batch_size, word_embed_size)) self.assertEqual(x_word_emb.shape, x_conv_out.shape) print("Sanity Check Passed for Question 1d: Highway Shape!") print("-" * 80)
def question_1h_sanity_check(): """ Sanity check for Highway network """ print("-" * 80) print("Running Sanity Check for Question 1h: Highway network") print("-" * 80) net = Highway(5, 5) input = torch.randn((10, 6, 5)) normal_output = net.forward(input) assert normal_output.shape == input.shape print("-" * 80) print("Sanity Check Passed for Question 1h: Highway network")
def question_1g_sanity_check(): """ Sanity check for highway module """ print ("-"*80) print("Running Sanity Check for Question 1g: highway") print ("-"*80) highway = Highway(EMBED_SIZE) conv_out = torch.rand(BATCH_SIZE, EMBED_SIZE) highway = highway.forward(conv_out) expected_size = [BATCH_SIZE, EMBED_SIZE] assert(list(highway.size()) == expected_size) print("Sanity Check Passed for Question 1f: Padding!") print("-"*80)
def question_1f_sanity_check(): """Sanity check for highway module """ print("-" * 80) print("Running Sanity Check for Question 1f: Highway") print("-" * 80) highway = Highway(EMBED_SIZE) x_convout = torch.randn(BATCH_SIZE, EMBED_SIZE) ret = highway.forward(x_convout) output_expected_size = (BATCH_SIZE, EMBED_SIZE) assert output_expected_size == ret.shape print("Sanity Check Passed for Question 1f: Highway!") print("-" * 80)
def question_1f_sanity_check(): """ Sanity check for Highway() class. """ print("-" * 80) print("Running Sanity Check for Question 1f: Highway Class") """ Shape Dimension Check for Highway Class """ inputs = torch.randint(100, size=(5, 100), dtype=torch.float) highway = Highway(100, 0.3) out = highway(inputs) expected_out_shape = (5, 100) assert (torch.Size(expected_out_shape) == out.shape ), "The shape of Highway output is incorrect" """ Matrix Mult for Highway Class """ def reinitialize_layers(model): """ Reinitialize the Layer Weights for Sanity Checks. """ def init_weights(m): if type(m) == nn.Linear: m.weight.data = torch.tensor([[.1, .1, .1, .1, .1], [.03, .03, .03, .03, .03], [.5, .5, .5, .5, .5], [-.7, -.7, -.7, -.7, -.7], [-.9, -.9, -.9, -.9, -.9]]) if m.bias is not None: m.bias.data.fill_(0.1) elif type(m) == nn.Embedding: m.weight.data.fill_(0.15) elif type(m) == nn.Dropout: nn.Dropout(DROPOUT_RATE) with torch.no_grad(): model.apply(init_weights) highway = Highway(5, 0) reinitialize_layers(highway) inputs = torch.tensor([[3, 5, 7, 11, 13]], dtype=torch.float) # Sums to 39 # W * x + b = [3.9, 1.18, 19.6, -27.4, -35.2] # relu (w x + b) = [3.9, 1.18, 19.6, 0, 0] # sigmoid(w x + b) = [.98, 0.76, 1, 0, 0] # sgimoid * relu(wx + b) + (1- sigmoid) * x = [3.9, 2.09, 19.6, 11, 13] ans = highway(inputs) expected = np.array([3.9, 2.09, 19.6, 11, 13]) assert (np.allclose( expected, ans.detach().numpy(), atol=0.1)), "highway function not giving expected output" print("Sanity Check Passed for Question 1f: Highway Class") print("-" * 80)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1f self.embed_size = embed_size ## Create embedding pad_token_idx = vocab.char2id['<pad>'] self.embeddings = nn.Embedding(len(vocab.char2id), embed_size, padding_idx=pad_token_idx) ## Input to 1D Conv. Net self.cnn = CNN(embed_size, embed_size, kernel_size=5) ## Highway Network self.highway = Highway(embed_size, dropout_rate=0.3)
def question_1h_test(): """ Custom simple test for Highway module. (TODO: More detail sanity checks rather than just check the shape) """ from highway import Highway print("-" * 80) print("Running Sanity Check for Question 1h: Highway") print("-" * 80) print("Running test on a random tensor") sentence_length = 6 max_word_length = 21 tensor_shape = [ sentence_length, BATCH_SIZE, max_word_length, EMBED_SIZE ] # (max sentence length, batch size, max word length, embedding dimension) gold_shape = torch.Size(tensor_shape) highway = Highway(EMBED_SIZE, dropout_rate=0) test_tensor = torch.randn(tensor_shape) output_tensor = highway(test_tensor) assert output_tensor.shape == gold_shape, "Ouput tensor shape is incorrect: it should be:\n {} but is:\n{}".format( gold_shape, output_tensor.shape) print("Sanity Check Passed for Question 1h: Highway!") print("-" * 80)
def question_1h_sanity_check(): # Sanity check for highway.py print("-" * 80) print("Running Sanity Check for Question 1h: Highway") print("-" * 80) inpt = torch.zeros(BATCH_SIZE, EMBED_SIZE) highway_net = Highway(EMBED_SIZE) output = highway_net.forward(inpt) output_expected_size = [BATCH_SIZE, EMBED_SIZE] assert ( list(output.size()) == output_expected_size ), "output shape is incorrect: it should be:\n {} but is:\n{}".format( output_expected_size, list(output.size())) print("Sanity Check Passed for Question 1h: Highway!") print("-" * 80)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.dropout_rate = 0.3 self.emb_char = 50 self.embed_size = embed_size pad_token_idx = vocab.char2id['<pad>'] self.embeddings = nn.Embedding(len(vocab.char2id), self.emb_char, padding_idx=pad_token_idx) # Conv1d + Pooling1d cnn = CNN(emb_char=self.emb_char, emb_word=self.embed_size) # with dropout rate? highway = Highway(embed_size=self.embed_size, dropout_rate=self.dropout_rate) # with a dropout layer? dropout = nn.Dropout(self.dropout_rate) # chain all toghether self.construct_emb = nn.Sequential(cnn, highway, dropout)
def test_gate_bypass(self): print("-" * 80) print("Running Sanity Check for Question 1d: Highway Bypass") print("-" * 80) batch_size, word_embed_size = 64, 40 highway = Highway(word_embed_size) highway.gate.weight.data[:, :] = 0.0 highway.gate.bias.data[:] = - float('inf') x_conv_out = torch.randn([batch_size, word_embed_size]) x_word_emb = highway.forward(x_conv_out) self.assertTrue(torch.allclose(x_conv_out, x_word_emb)) print("Sanity Check Passed for Question 1d: Highway Bypass!") print("-" * 80)
def __init__(self, word_embed_size, vocab): """ Init the Embedding layer for one language @param word_embed_size (int): Embedding size (dimensionality) for the output word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. Hints: - You may find len(self.vocab.char2id) useful when create the embedding """ super(ModelEmbeddings, self).__init__() ### YOUR CODE HERE for part 1h self.embed_size = word_embed_size self.vocab = vocab self.char_embed_size = 50 self.max_word_len = 21 self.dropout_rate = 0.2 self.char_embedding = nn.Embedding( num_embeddings=len(vocab.char2id), embedding_dim=self.char_embed_size, padding_idx=vocab.char2id['<pad>'], ) self.CNN = CNN(embed_size=self.char_embed_size, out_channels=self.embed_size, word_len=self.max_word_len) self.Highway = Highway(self.embed_size) self.dropout = nn.Dropout(self.dropout_rate)
def __init__(self, embed_size, kenerl_size=5): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(CNNClassifier, self).__init__() self.embed_size = embed_size self.kenerl_size = kenerl_size self.embeddings = nn.Embedding(len(vocab.char2id), self.embed_char, padding_idx=pad_token_idx) self.cnn = CNN(self.embed_char, self.kenerl_size, self.embed_size) #self.embed_size is number of filter self.highway = Highway(self.embed_size, dropout_rate=0.3)
class CNNClassifier(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, kenerl_size=5): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(CNNClassifier, self).__init__() self.embed_size = embed_size self.kenerl_size = kenerl_size self.embeddings = nn.Embedding(len(vocab.char2id), self.embed_char, padding_idx=pad_token_idx) self.cnn = CNN(self.embed_char, self.kenerl_size, self.embed_size) #self.embed_size is number of filter self.highway = Highway(self.embed_size, dropout_rate=0.3) def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1f #print(input.shape) output = self.embeddings(input) #print(output.shape) x_reshape = output.permute( 0, 1, 3, 2) ##(sentence_length, batch_size, ebed_char, max_word_length) shape = x_reshape.shape sentence_length = shape[0] batch_size = shape[1] max_word_length = shape[3] x_reshape = x_reshape.view(-1, self.embed_char, max_word_length) #print(x_reshape.shape) x_cnn = self.cnn.forward(x_reshape) #print(x_cnn.shape) x_highway = self.highway.forward( x_cnn.view(sentence_length, batch_size, self.embed_size)) return x_highway # (sentence_length, batch_size, embed_size, max_word_length-k+1) # batch_size, sentence_length, embed_size, max_word_length-k+1 ### END YOUR CODE