def build_language_training_rnn(sess, hyper_params, prog_params, train_set, test_set): model = LanguageModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["char_map_length"]) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["char_map"]) v_iterator = None if test_set is []: t_iterator = model.add_dataset_input(train_dataset) sess.run(t_iterator.initializer) else: test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["char_map"]) # Build the input stream from the different datasets t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset) sess.run(t_iterator.initializer) sess.run(v_iterator.initializer) # Create the model model.create_training_rnn(hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"], hyper_params["grad_clip"], hyper_params["learning_rate"], hyper_params["lr_decay_factor"], use_iterator=True) model.add_tensorboard(sess, hyper_params["tensorboard_dir"], prog_params["tb_name"], prog_params["timeline"]) model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/language/") # Override the learning rate if given on the command line if prog_params["learn_rate"] is not None: model.set_learning_rate(sess, prog_params["learn_rate"]) return model, t_iterator, v_iterator
def test_build_dataset(self): tf.reset_default_graph() with tf.Session() as sess: model = LanguageModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.input_dim) # Create a Dataset from the train_set and the test_set dataset = model.build_dataset(["the brown lazy fox", "the red quick fox"], self.batch_size, self.max_input_seq_length, ENGLISH_CHAR_MAP) iterator = dataset.make_initializable_iterator() sess.run(iterator.initializer) iterator_get_next_op = iterator.get_next() input_dataset, input_length_dataset, label_dataset = sess.run(iterator_get_next_op) # Rebuild the expected result for comparison expected_result = [] one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded(ENGLISH_CHAR_MAP, "the brown lazy fox") expected_result.append(one_hot) one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded(ENGLISH_CHAR_MAP, "the red quick fox") # Append the padding one_hot.append(np.zeros(len(ENGLISH_CHAR_MAP))) expected_result.append(one_hot) # Check values np.testing.assert_array_equal(input_dataset, expected_result) np.testing.assert_array_equal(input_length_dataset, [18, 17]) np.testing.assert_array_equal(label_dataset[0], [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11], [0, 12], [0, 13], [0, 14], [0, 15], [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], [1, 11], [1, 12], [1, 13], [1, 14] ]) np.testing.assert_array_equal(label_dataset[1], [33, 30, 53, 43, 40, 48, 39, 63, 26, 51, 50, 57, 40, 49, 79, 79, 33, 30, 69, 30, 29, 68, 46, 34, 28, 36, 57, 40, 49, 79, 79]) np.testing.assert_array_equal(label_dataset[2], [2, 1800])
def test_create_training_rnn_with_iterators(self): tf.reset_default_graph() with tf.Session(): model = LanguageModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.input_dim) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset(["the brown lazy fox", "the red quick fox"], self.batch_size, self.max_input_seq_length, ENGLISH_CHAR_MAP) model.add_dataset_input(train_dataset) model.create_training_rnn(self.input_keep_prob, self.output_keep_prob, self.grad_clip, self.learning_rate, self.lr_decay_factor, use_iterator=True)
def test_create_training_rnn_with_iterators(self): tf.reset_default_graph() with tf.Session(): model = LanguageModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.input_dim) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset( ["the brown lazy fox", "the red quick fox"], self.batch_size, self.max_input_seq_length, ENGLISH_CHAR_MAP) model.add_dataset_input(train_dataset) model.create_training_rnn(self.input_keep_prob, self.output_keep_prob, self.grad_clip, self.learning_rate, self.lr_decay_factor, use_iterator=True)
def test_build_dataset(self): tf.reset_default_graph() with tf.Session() as sess: model = LanguageModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.input_dim) # Create a Dataset from the train_set and the test_set dataset = model.build_dataset( ["the brown lazy fox", "the red quick fox"], self.batch_size, self.max_input_seq_length, ENGLISH_CHAR_MAP) iterator = dataset.make_initializable_iterator() sess.run(iterator.initializer) iterator_get_next_op = iterator.get_next() input_dataset, input_length_dataset, label_dataset = sess.run( iterator_get_next_op) # Rebuild the expected result for comparison expected_result = [] one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded( ENGLISH_CHAR_MAP, "the brown lazy fox") expected_result.append(one_hot) one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded( ENGLISH_CHAR_MAP, "the red quick fox") # Append the padding one_hot.append(np.zeros(len(ENGLISH_CHAR_MAP))) expected_result.append(one_hot) # Check values np.testing.assert_array_equal(input_dataset, expected_result) np.testing.assert_array_equal(input_length_dataset, [18, 17]) np.testing.assert_array_equal( label_dataset[0], [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11], [0, 12], [0, 13], [0, 14], [0, 15], [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], [1, 11], [1, 12], [1, 13], [1, 14]]) np.testing.assert_array_equal(label_dataset[1], [ 33, 30, 53, 43, 40, 48, 39, 63, 26, 51, 50, 57, 40, 49, 79, 79, 33, 30, 69, 30, 29, 68, 46, 34, 28, 36, 57, 40, 49, 79, 79 ]) np.testing.assert_array_equal(label_dataset[2], [2, 1800])