def question_1g_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1g: to_input_tensor_char") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] X = vocab.to_input_tensor_char(sentences, "cpu") # 6 is the max_sentence_length # 4 is batch size # 21 is max_word_length assert X.shape == ( 6, 4, 21), f"Size is incorrect: it should be (6, 4, 21) but it is {X.shape}" print("Sanity Check Passed for Question 1g: to_input_tensor_char!") print("-" * 80)
def question_1c_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1c: To input tensor") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format( gold_padded_sentences, padded_sentences) batch_size = len(gold_padded_sentences) max_sentence_length = len(gold_padded_sentences[0]) max_word_length = len(gold_padded_sentences[0][0]) padded_sentences_tensor = vocab.to_input_tensor_char(sentences, device=torch.device('cpu')) assert (padded_sentences_tensor.size() == (max_sentence_length, batch_size, max_word_length)) print("Sanity Check Passed for Question 1c: To input tensor") print("-" * 80)
def question_1g_sanity_check(model): """ Sanity check for pad_sents_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1g: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) #padded_sentences = pad_sents_char(word_ids, 0) padded_sentences = vocab.to_input_tensor_char(sentences, model.device) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') a = torch.rand(6, 4, 21) print(a.size()) print(padded_sentences.size()) assert padded_sentences.size() == a.size( ), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format( a.size(), padded_sentences.size()) print("Sanity Check Passed for Question 1g: Padding!") print("-" * 80)
def question_1e_sanity_check(): """Sanity check for to_input_tensor_char() function.""" print("-" * 80) print("Running Sanity Check for Question 1e: To Input Tensor Char") print("-" * 80) vocabEntry = VocabEntry() print("Running test on a list of sentences") sentences = [ ["Human", ":", "What", "do", "we", "want", "?"], ["Computer", ":", "Natural", "language", "processing", "!"], ["Human", ":", "When", "do", "we", "want", "it", "?"], ["Computer", ":", "When", "do", "we", "want", "what", "?"], ] sentence_length = 8 BATCH_SIZE = 4 word_length = 12 output = vocabEntry.to_input_tensor_char(sentences, "cpu") output_expected_size = [sentence_length, BATCH_SIZE, word_length] assert ( list(output.size()) == output_expected_size ), "output shape is incorrect: it should be:\n {} but is:\n{}".format( output_expected_size, list(output.size())) print("Sanity Check Passed for Question 1e: To Input Tensor Char!") print("-" * 80)
def question_1g_sanity_check(): """ Sanity check for to input tensor char() function. """ print("-" * 80) print("Running Sanity Check for Question 1g: Creating Input Tensor") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] device = torch.device("cpu") max_sentence_length = max([len(sent) for sent in sentences]) max_word_length = 21 input_tensor = vocab.to_input_tensor_char(sentences, device) print(input_tensor) batch_size = len(sentences) correct_shape = [max_sentence_length, batch_size, max_word_length] actual_shape = list(input_tensor.size()) assert actual_shape == correct_shape, "Input Tensor Creation is incorrect: it should be \n{} but is:{}".format( correct_shape, input_tensor.size()) print("Sanity Check Passed for Question 1g: Creating Input Tensor!") print("-" * 80)
def question_1h_sanity_check(model): """ Sanity check for highway network """ print("-" * 80) print("Running Sanity Check for Question 1h: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) #padded_sentences = pad_sents_char(word_ids, 0) padded_sentences = vocab.to_input_tensor_char(sentences, model.device) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') #Test with batch size 1 x = torch.rand(1, 1, 21) hw = Highway(21, 21, 21, 0.5) hw.forward(x) #Test with batch size 4 print(a.size()) print(padded_sentences.size()) #assert padded_sentences.size() == a.size(), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(a.size(), padded_sentences.size()) print("Sanity Check Passed for Question 1h: Padding!") print("-" * 80)
def question_1g_test(): """ Custom simple test for to_input_tensor_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1g: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] gold_shape = torch.Size( [6, 4, 21]) # (max sentence length, batch size, max word length) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') input_tensor = vocab.to_input_tensor_char(sentences, device) # print("We get torch tensor:\n", input_tensor) assert input_tensor.shape == gold_shape, "Ouput tensor shape is incorrect: it should be:\n {} but is:\n{}".format( gold_shape, input_tensor.shape) print("Sanity Check Passed for Question 1g: Padding!") print("-" * 80)
def question_1e_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1e: To Input Tensor Char") print("-" * 80) vocabEntry = VocabEntry() print("Running test on a list of sentences") sentences = [['Human', ':', 'What', 'do', 'we', 'want', '?'], ['Computer', ':', 'Natural', 'language', 'processing', '!'], ['Human', ':', 'When', 'do', 'we', 'want', 'it', '?'], ['Computer', ':', 'When', 'do', 'we', 'want', 'what', '?']] sentence_length = 8 BATCH_SIZE = 4 word_length = 12 output = vocabEntry.to_input_tensor_char(sentences, 'cpu') output_expected_size = [sentence_length, BATCH_SIZE, word_length] assert list( output.size() ) == output_expected_size, "output shape is incorrect: it should be:\n {} but is:\n{}".format( output_expected_size, list(output.size())) print("Sanity Check Passed for Question 1e: To Input Tensor Char!") print("-" * 80)
def test_file1_method1(): batch_size = 2 max_sent_len = 3 max_word_length = 21 sentence = [['ciao', 'come', 'staiii'], ['sto', 'bene']] v = VocabEntry() tens = v.to_input_tensor_char(sentence, torch.device('cpu')) assert tens.shape[0] == max_sent_len assert tens.shape[1] == batch_size assert tens.shape[2] == max_word_length, ''
def question_1g_sanity_check(): """ Sanity check for to_input() function. """ print ("-"*80) print("Running Sanity Check for Question 1g: Reshape") print ("-"*80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] sent_tensor = vocab.to_input_tensor_char(sentences, "cpu") print("Sanity Check Passed for Question 1g: Reshape!") print("-"*80)
def question_1c_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1c") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] sent_padded = vocab.to_input_tensor_char(sentences, torch.device)
def question_1g_sanity_check(): """ Sanity check for pad_sents_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1g: question_1g_sanity_check") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] t = vocab.to_input_tensor_char(sentences, torch.device('cpu', 0)) print("Sanity Check Passed for Question 1g:shape=" + str(t.shape)) print("-" * 80)
def question_1g_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print ("-"*80) print("Running Sanity Check for Question 1g: Padding") print ("-"*80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] input_tensor = vocab.to_input_tensor_char(sentences, torch.device('cuda', 0)) # print(input_tensor.shape) assert input_tensor.shape == (6, 4, 21) print("Sanity Check Passed for Question 1g: Padding!") print("-"*80) pass
def question_1c_sanity_check(): print("-" * 80) print("Running Sanity Check for Question 1c: Input tensor") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) o_tnsr = vocab.to_input_tensor_char(sentences, "cpu") print(o_tnsr.shape)
def question_1g_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print ("-"*80) print("Running Sanity Check for Question 1g: Building the input tensor") print ("-"*80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] device = torch.device('cpu') padded_tensor = vocab.to_input_tensor_char(sentences, device) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') gold_padded_tensor = torch.tensor(gold_padded_sentences, device = device).permute(1, 0, 2) assert padded_tensor.size() == gold_padded_tensor.size(), "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences) print("Sanity Check Passed for Question 1g: Building the input tensor!") print("-"*80)
def question_1g_sanity_check(): """ Sanity check for to_input_tensor_char() function :return: """ print("-" * 80) print("Running Sanity Check for Question 1g") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) a = np.asarray(padded_sentences).transpose((1, 0, 2)) a = torch.Tensor(a) b = vocab.to_input_tensor_char(sentences, device="cpu") assert a.equal(b), "Wrong!" print("Sanity Check Passed for Question 1g") print("-" * 80)
def test1c(): voc = VocabEntry() sents_var = voc.to_input_tensor_char(sentences, None) print(sents_var)