Пример #1
0
    def setUp(self):
        # Seed the Random Number Generators
        seed = 1234
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        np.random.seed(seed * 13 // 7)

        # Load training data & vocabulary
        train_data_src = submission.read_corpus(
            './sanity_check_en_es_data/train_sanity_check.es', 'src')
        train_data_tgt = submission.read_corpus(
            './sanity_check_en_es_data/train_sanity_check.en', 'tgt')
        train_data = list(zip(train_data_src, train_data_tgt))

        for src_sents, tgt_sents in submission.batch_iter(
                train_data, batch_size=BATCH_SIZE, shuffle=True):
            self.src_sents = src_sents
            self.tgt_sents = tgt_sents
            break
        self.vocab = Vocab.load(
            './sanity_check_en_es_data/vocab_sanity_check.json')

        # Create NMT Model
        self.model = submission.NMT(embed_size=EMBED_SIZE,
                                    hidden_size=HIDDEN_SIZE,
                                    dropout_rate=DROPOUT_RATE,
                                    vocab=self.vocab)
Пример #2
0
    def test_0(self):
        """1d-0-basic:  Sanity check for Encode.  Compares student output to that of model with dummy data."""
        # Seed the Random Number Generators
        seed = 1234
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        np.random.seed(seed * 13 // 7)

        # Load training data & vocabulary
        train_data_src = submission.read_corpus(
            './sanity_check_en_es_data/train_sanity_check.es', 'src')
        train_data_tgt = submission.read_corpus(
            './sanity_check_en_es_data/train_sanity_check.en', 'tgt')
        train_data = list(zip(train_data_src, train_data_tgt))

        for src_sents, tgt_sents in submission.batch_iter(
                train_data, batch_size=BATCH_SIZE, shuffle=True):
            src_sents = src_sents
            tgt_sents = tgt_sents
            break
        vocab = Vocab.load('./sanity_check_en_es_data/vocab_sanity_check.json')

        # Create NMT Model
        model = submission.NMT(embed_size=EMBED_SIZE,
                               hidden_size=HIDDEN_SIZE,
                               dropout_rate=DROPOUT_RATE,
                               vocab=vocab)
        # Configure for Testing
        reinitialize_layers(model)
        source_lengths = [len(s) for s in src_sents]
        source_padded = model.vocab.src.to_input_tensor(src_sents,
                                                        device=model.device)

        # Load Outputs
        enc_hiddens_target = torch.load(
            './sanity_check_en_es_data/enc_hiddens.pkl')
        dec_init_state_target = torch.load(
            './sanity_check_en_es_data/dec_init_state.pkl')

        # Test
        with torch.no_grad():
            enc_hiddens_pred, dec_init_state_pred = model.encode(
                source_padded, source_lengths)
        self.assertTrue(
            np.allclose(enc_hiddens_target.numpy(), enc_hiddens_pred.numpy())
        ), "enc_hiddens is incorrect: it should be:\n {} but is:\n{}".format(
            enc_hiddens_target, enc_hiddens_pred)
        print("enc_hiddens Sanity Checks Passed!")
        self.assertTrue(
            np.allclose(dec_init_state_target[0].numpy(),
                        dec_init_state_pred[0].numpy())
        ), "dec_init_state[0] is incorrect: it should be:\n {} but is:\n{}".format(
            dec_init_state_target[0], dec_init_state_pred[0])
        print("dec_init_state[0] Sanity Checks Passed!")
        self.assertTrue(
            np.allclose(dec_init_state_target[1].numpy(),
                        dec_init_state_pred[1].numpy())
        ), "dec_init_state[1] is incorrect: it should be:\n {} but is:\n{}".format(
            dec_init_state_target[1], dec_init_state_pred[1])
        print("dec_init_state[1] Sanity Checks Passed!")
Пример #3
0
    def test_1(self):
        """1e-1-hidden: Combined Outputs Check"""
        # Set Seeds
        random.seed(35436)
        np.random.seed(4355)
        torch.manual_seed(42)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)

        # Create Inputs
        input = setup()
        self.vocab = input[-1]

        # Initialize student model
        self.model = submission.NMT(embed_size=LARGE_EMBED_SIZE,
                                    hidden_size=LARGE_HIDDEN_SIZE,
                                    dropout_rate=NONZERO_DROPOUT_RATE,
                                    vocab=self.vocab)

        # Initialize soln model
        random.seed(35436)
        np.random.seed(4355)
        torch.manual_seed(42)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)
        self.soln_model = self.run_with_solution_if_possible(
            submission, lambda sub_or_sol: sub_or_sol).NMT(
                embed_size=LARGE_EMBED_SIZE,
                hidden_size=LARGE_HIDDEN_SIZE,
                dropout_rate=NONZERO_DROPOUT_RATE,
                vocab=self.vocab)
        # To prevent dropout
        self.model.train(False)
        self.soln_model.train(False)

        self.source_lengths = [len(s) for s in input[0]]
        self.source_padded = self.soln_model.vocab.src.to_input_tensor(
            input[0], device=self.soln_model.device)
        self.target_padded = self.soln_model.vocab.tgt.to_input_tensor(
            input[1], device=self.soln_model.device)  # Tensor: (tgt_len, b)

        self.target = input[1]
        self.combined_outputs = test_combined_outputs(
            self.source_padded, self.source_lengths, self.target_padded,
            self.model, self.soln_model, self.vocab)
        self.assertTrue(self.combined_outputs)
Пример #4
0
    def setUp(self):
        # Set Seeds
        random.seed(35436)
        np.random.seed(4355)
        torch.manual_seed(42)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)

        # Create Inputs
        input = setup()
        self.vocab = input[-1]

        # Initialize student model
        self.model = submission.NMT(embed_size=LARGE_EMBED_SIZE,
                                    hidden_size=LARGE_HIDDEN_SIZE,
                                    dropout_rate=NONZERO_DROPOUT_RATE,
                                    vocab=self.vocab)

        # Initialize soln model
        random.seed(35436)
        np.random.seed(4355)
        torch.manual_seed(42)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)
        self.soln_model = self.run_with_solution_if_possible(
            submission, lambda sub_or_sol: sub_or_sol).NMT(
                embed_size=LARGE_EMBED_SIZE,
                hidden_size=LARGE_HIDDEN_SIZE,
                dropout_rate=NONZERO_DROPOUT_RATE,
                vocab=self.vocab)

        self.source_lengths = [len(s) for s in input[0]]
        self.source_padded = self.soln_model.vocab.src.to_input_tensor(
            input[0], device=self.soln_model.device)
        self.enc_hidden, self.decode_hidden, self.decode_cell = test_encoding_hiddens(
            self.source_padded, self.source_lengths, self.model,
            self.soln_model, self.vocab)
Пример #5
0
    def test_0(self):
        """1f-0-basic:  Sanity check for Step.  Compares student output to that of model with dummy data."""
        # Seed the Random Number Generators
        seed = 1234
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        np.random.seed(seed * 13 // 7)

        # Load training data & vocabulary
        train_data_src = submission.read_corpus(
            './sanity_check_en_es_data/train_sanity_check.es', 'src')
        train_data_tgt = submission.read_corpus(
            './sanity_check_en_es_data/train_sanity_check.en', 'tgt')
        train_data = list(zip(train_data_src, train_data_tgt))

        for src_sents, tgt_sents in submission.batch_iter(
                train_data, batch_size=BATCH_SIZE, shuffle=True):
            self.src_sents = src_sents
            self.tgt_sents = tgt_sents
            break
        self.vocab = Vocab.load(
            './sanity_check_en_es_data/vocab_sanity_check.json')

        # Create NMT Model
        self.model = submission.NMT(embed_size=EMBED_SIZE,
                                    hidden_size=HIDDEN_SIZE,
                                    dropout_rate=DROPOUT_RATE,
                                    vocab=self.vocab)

        reinitialize_layers(self.model)
        # Inputs
        Ybar_t = torch.load('./sanity_check_en_es_data/Ybar_t.pkl')
        dec_init_state = torch.load(
            './sanity_check_en_es_data/dec_init_state.pkl')
        enc_hiddens = torch.load('./sanity_check_en_es_data/enc_hiddens.pkl')
        enc_masks = torch.load('./sanity_check_en_es_data/enc_masks.pkl')
        enc_hiddens_proj = torch.load(
            './sanity_check_en_es_data/enc_hiddens_proj.pkl')

        # Output
        dec_state_target = torch.load(
            './sanity_check_en_es_data/dec_state.pkl')
        o_t_target = torch.load('./sanity_check_en_es_data/o_t.pkl')
        e_t_target = torch.load('./sanity_check_en_es_data/e_t.pkl')

        # Run Tests
        with torch.no_grad():
            dec_state_pred, o_t_pred, e_t_pred = self.model.step(
                Ybar_t, dec_init_state, enc_hiddens, enc_hiddens_proj,
                enc_masks)
        self.assertTrue(
            np.allclose(dec_state_target[0].numpy(),
                        dec_state_pred[0].numpy()),
            "decoder_state[0] should be:\n {} but is:\n{}".format(
                dec_state_target[0], dec_state_pred[0]))
        print("dec_state[0] Sanity Checks Passed!")
        self.assertTrue(
            np.allclose(dec_state_target[1].numpy(),
                        dec_state_pred[1].numpy()),
            "decoder_state[1] should be:\n {} but is:\n{}".format(
                dec_state_target[1], dec_state_pred[1]))
        print("dec_state[1] Sanity Checks Passed!")
        self.assertTrue(
            np.allclose(o_t_target.numpy(), o_t_pred.numpy()),
            "combined_output should be:\n {} but is:\n{}".format(
                o_t_target, o_t_pred))
        print("combined_output  Sanity Checks Passed!")
        self.assertTrue(
            np.allclose(e_t_target.numpy(), e_t_pred.numpy()),
            "e_t should be:\n {} but is:\n{}".format(e_t_target, e_t_pred))
Пример #6
0
    def setUp(self):
        # Set Seeds
        random.seed(35436)
        np.random.seed(4355)
        torch.manual_seed(42)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)

        # Create Inputs
        input = setup()
        self.vocab = input[-1]

        # Initialize student model
        self.model = submission.NMT(embed_size=LARGE_EMBED_SIZE,
                                    hidden_size=LARGE_HIDDEN_SIZE,
                                    dropout_rate=NONZERO_DROPOUT_RATE,
                                    vocab=self.vocab)

        # Initialize soln model
        random.seed(35436)
        np.random.seed(4355)
        torch.manual_seed(42)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)
        self.soln_model = self.run_with_solution_if_possible(
            submission, lambda sub_or_sol: sub_or_sol).NMT(
                embed_size=LARGE_EMBED_SIZE,
                hidden_size=LARGE_HIDDEN_SIZE,
                dropout_rate=NONZERO_DROPOUT_RATE,
                vocab=self.vocab)

        # Initialize alternative soln model (change concat order in step function)
        # random.seed(35436)
        # np.random.seed(4355)
        # torch.manual_seed(42)
        # if torch.cuda.is_available():
        #     torch.cuda.manual_seed(42)
        # self.alt_soln_model = self.run_with_solution_if_possible(submission, lambda sub_or_sol:sub_or_sol).NMT_alt(
        #     embed_size = LARGE_EMBED_SIZE,
        #     hidden_size = LARGE_HIDDEN_SIZE,
        #     dropout_rate = NONZERO_DROPOUT_RATE,
        #     vocab = self.vocab
        # )

        # To prevent dropout
        self.model.train(False)
        self.soln_model.train(False)
        # self.alt_soln_model.train(False)

        # Generate Inputs
        random.seed(35436)
        np.random.seed(4355)
        torch.manual_seed(42)

        Ybar_t = torch.randn(LARGE_BATCH_SIZE,
                             LARGE_EMBED_SIZE + LARGE_HIDDEN_SIZE,
                             dtype=torch.float)
        dec_init_state = (torch.randn(LARGE_BATCH_SIZE,
                                      LARGE_HIDDEN_SIZE,
                                      dtype=torch.float),
                          torch.randn(LARGE_BATCH_SIZE,
                                      LARGE_HIDDEN_SIZE,
                                      dtype=torch.float))
        enc_hiddens = torch.randn(LARGE_BATCH_SIZE,
                                  20,
                                  LARGE_HIDDEN_SIZE * 2,
                                  dtype=torch.float)
        enc_hiddens_proj = torch.randn(LARGE_BATCH_SIZE,
                                       20,
                                       LARGE_HIDDEN_SIZE,
                                       dtype=torch.float)
        enc_masks = (torch.randn(LARGE_BATCH_SIZE, 20, dtype=torch.float) >=
                     0.5)

        self.dec_hidden_result, self.dec_state_result, self.o_t_result, self.e_t_result = \
            test_q1f(Ybar_t, dec_init_state, enc_hiddens, enc_hiddens_proj, enc_masks, self.model, self.soln_model)
Пример #7
0
 def setUp(self):
     self.vocab = DummyVocab()
     self.student_result = submission.NMT(12,
                                          17,
                                          self.vocab,
                                          dropout_rate=0.34)