示例#1
0
文件: stt.py 项目: inikdom/rnn-speech
def build_language_training_rnn(sess, hyper_params, prog_params, train_set, test_set):
    model = LanguageModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"],
                          hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                          hyper_params["char_map_length"])

    # Create a Dataset from the train_set and the test_set
    train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                        hyper_params["char_map"])

    v_iterator = None
    if test_set is []:
        t_iterator = model.add_dataset_input(train_dataset)
        sess.run(t_iterator.initializer)
    else:
        test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                           hyper_params["char_map"])

        # Build the input stream from the different datasets
        t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset)
        sess.run(t_iterator.initializer)
        sess.run(v_iterator.initializer)

    # Create the model
    model.create_training_rnn(hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"],
                              hyper_params["grad_clip"], hyper_params["learning_rate"],
                              hyper_params["lr_decay_factor"], use_iterator=True)
    model.add_tensorboard(sess, hyper_params["tensorboard_dir"], prog_params["tb_name"], prog_params["timeline"])
    model.initialize(sess)
    model.restore(sess, hyper_params["checkpoint_dir"] + "/language/")

    # Override the learning rate if given on the command line
    if prog_params["learn_rate"] is not None:
        model.set_learning_rate(sess, prog_params["learn_rate"])

    return model, t_iterator, v_iterator
示例#2
0
def build_language_training_rnn(sess, hyper_params, prog_params, train_set, test_set):
    model = LanguageModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"],
                          hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                          hyper_params["char_map_length"])

    # Create a Dataset from the train_set and the test_set
    train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                        hyper_params["char_map"])

    v_iterator = None
    if test_set is []:
        t_iterator = model.add_dataset_input(train_dataset)
        sess.run(t_iterator.initializer)
    else:
        test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                           hyper_params["char_map"])

        # Build the input stream from the different datasets
        t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset)
        sess.run(t_iterator.initializer)
        sess.run(v_iterator.initializer)

    # Create the model
    model.create_training_rnn(hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"],
                              hyper_params["grad_clip"], hyper_params["learning_rate"],
                              hyper_params["lr_decay_factor"], use_iterator=True)
    model.add_tensorboard(sess, hyper_params["tensorboard_dir"], prog_params["tb_name"], prog_params["timeline"])
    model.initialize(sess)
    model.restore(sess, hyper_params["checkpoint_dir"] + "/language/")

    # Override the learning rate if given on the command line
    if prog_params["learn_rate"] is not None:
        model.set_learning_rate(sess, prog_params["learn_rate"])

    return model, t_iterator, v_iterator
示例#3
0
    def test_build_dataset(self):
        tf.reset_default_graph()

        with tf.Session() as sess:
            model = LanguageModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length,
                                  self.max_target_seq_length, self.input_dim)

            # Create a Dataset from the train_set and the test_set
            dataset = model.build_dataset(["the brown lazy fox", "the red quick fox"], self.batch_size,
                                          self.max_input_seq_length, ENGLISH_CHAR_MAP)
            iterator = dataset.make_initializable_iterator()
            sess.run(iterator.initializer)
            iterator_get_next_op = iterator.get_next()
            input_dataset, input_length_dataset, label_dataset = sess.run(iterator_get_next_op)
            # Rebuild the expected result for comparison
            expected_result = []
            one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded(ENGLISH_CHAR_MAP, "the brown lazy fox")
            expected_result.append(one_hot)
            one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded(ENGLISH_CHAR_MAP, "the red quick fox")
            # Append the padding
            one_hot.append(np.zeros(len(ENGLISH_CHAR_MAP)))
            expected_result.append(one_hot)
            # Check values
            np.testing.assert_array_equal(input_dataset, expected_result)
            np.testing.assert_array_equal(input_length_dataset, [18, 17])
            np.testing.assert_array_equal(label_dataset[0],
                                          [[0, 0], [0, 1], [0, 2],  [0, 3],  [0, 4],  [0, 5],  [0, 6],  [0, 7],
                                           [0, 8], [0, 9], [0, 10], [0, 11], [0, 12], [0, 13], [0, 14], [0, 15],
                                           [1, 0], [1, 1], [1, 2],  [1, 3],  [1, 4],  [1, 5],  [1, 6],  [1, 7],
                                           [1, 8], [1, 9], [1, 10], [1, 11], [1, 12], [1, 13], [1, 14]
                                           ])
            np.testing.assert_array_equal(label_dataset[1],
                                          [33, 30, 53, 43, 40, 48, 39, 63, 26, 51, 50, 57, 40, 49, 79, 79,
                                           33, 30, 69, 30, 29, 68, 46, 34, 28, 36, 57, 40, 49, 79, 79])
            np.testing.assert_array_equal(label_dataset[2], [2, 1800])
示例#4
0
    def test_create_training_rnn_with_iterators(self):
        tf.reset_default_graph()

        with tf.Session():
            model = LanguageModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length,
                                  self.max_target_seq_length, self.input_dim)

            # Create a Dataset from the train_set and the test_set
            train_dataset = model.build_dataset(["the brown lazy fox", "the red quick fox"], self.batch_size,
                                                self.max_input_seq_length, ENGLISH_CHAR_MAP)
            model.add_dataset_input(train_dataset)
            model.create_training_rnn(self.input_keep_prob, self.output_keep_prob, self.grad_clip,
                                      self.learning_rate, self.lr_decay_factor, use_iterator=True)
示例#5
0
    def test_create_training_rnn_with_iterators(self):
        tf.reset_default_graph()

        with tf.Session():
            model = LanguageModel(self.num_layers, self.hidden_size,
                                  self.batch_size, self.max_input_seq_length,
                                  self.max_target_seq_length, self.input_dim)

            # Create a Dataset from the train_set and the test_set
            train_dataset = model.build_dataset(
                ["the brown lazy fox", "the red quick fox"], self.batch_size,
                self.max_input_seq_length, ENGLISH_CHAR_MAP)
            model.add_dataset_input(train_dataset)
            model.create_training_rnn(self.input_keep_prob,
                                      self.output_keep_prob,
                                      self.grad_clip,
                                      self.learning_rate,
                                      self.lr_decay_factor,
                                      use_iterator=True)
示例#6
0
    def test_build_dataset(self):
        tf.reset_default_graph()

        with tf.Session() as sess:
            model = LanguageModel(self.num_layers, self.hidden_size,
                                  self.batch_size, self.max_input_seq_length,
                                  self.max_target_seq_length, self.input_dim)

            # Create a Dataset from the train_set and the test_set
            dataset = model.build_dataset(
                ["the brown lazy fox", "the red quick fox"], self.batch_size,
                self.max_input_seq_length, ENGLISH_CHAR_MAP)
            iterator = dataset.make_initializable_iterator()
            sess.run(iterator.initializer)
            iterator_get_next_op = iterator.get_next()
            input_dataset, input_length_dataset, label_dataset = sess.run(
                iterator_get_next_op)
            # Rebuild the expected result for comparison
            expected_result = []
            one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded(
                ENGLISH_CHAR_MAP, "the brown lazy fox")
            expected_result.append(one_hot)
            one_hot = dataprocessor.DataProcessor.get_str_to_one_hot_encoded(
                ENGLISH_CHAR_MAP, "the red quick fox")
            # Append the padding
            one_hot.append(np.zeros(len(ENGLISH_CHAR_MAP)))
            expected_result.append(one_hot)
            # Check values
            np.testing.assert_array_equal(input_dataset, expected_result)
            np.testing.assert_array_equal(input_length_dataset, [18, 17])
            np.testing.assert_array_equal(
                label_dataset[0],
                [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6],
                 [0, 7], [0, 8], [0, 9], [0, 10], [0, 11], [0, 12], [0, 13],
                 [0, 14], [0, 15], [1, 0], [1, 1], [1, 2], [1, 3], [1, 4],
                 [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10], [1, 11],
                 [1, 12], [1, 13], [1, 14]])
            np.testing.assert_array_equal(label_dataset[1], [
                33, 30, 53, 43, 40, 48, 39, 63, 26, 51, 50, 57, 40, 49, 79, 79,
                33, 30, 69, 30, 29, 68, 46, 34, 28, 36, 57, 40, 49, 79, 79
            ])
            np.testing.assert_array_equal(label_dataset[2], [2, 1800])