示例#1
0
 def test_create_training_rnn(self):
     tf.reset_default_graph()
     with tf.Session():
         model = AcousticModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length,
                               self.max_target_seq_length, self.input_dim, self.normalization, self.num_labels)
         model.create_training_rnn(self.input_keep_prob, self.output_keep_prob, self.grad_clip,
                                   self.learning_rate, self.lr_decay_factor)
示例#2
0
def build_acoustic_training_rnn(is_mpi,is_chief, hyper_params, prog_params, train_set, test_set):
    model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"],
                          hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                          hyper_params["input_dim"], hyper_params["batch_normalization"],
                          hyper_params["char_map_length"])

    # Create a Dataset from the train_set and the test_set
    train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                        hyper_params["max_target_seq_length"], hyper_params["signal_processing"],
                                        hyper_params["char_map"])
    #train_dataset = train_dataset.shuffle(10,reshuffle_each_iteration=True)
    v_iterator = None
    if test_set is []:
        t_iterator = model.add_dataset_input(train_dataset)
    else:
        test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                           hyper_params["max_target_seq_length"], hyper_params["signal_processing"],
                                           hyper_params["char_map"])
        # Build the input stream from the different datasets
        t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset)

    # Create the model
    #tensorboard_dir

    model.create_training_rnn(is_mpi, hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"],
                              hyper_params["grad_clip"], hyper_params["learning_rate"],
                              hyper_params["lr_decay_factor"], use_iterator=True,is_sync=prog_params['is_sync'],is_chief=is_chief)

    if is_chief:
        model.add_tensorboard(prog_params["train_dir"], prog_params["timeline"])
    return model, t_iterator, v_iterator
示例#3
0
 def test_create_forward_rnn(self):
     tf.reset_default_graph()
     with tf.Session():
         model = AcousticModel(self.num_layers, self.hidden_size,
                               self.batch_size, self.max_input_seq_length,
                               self.max_target_seq_length, self.input_dim,
                               self.normalization, self.num_labels)
         model.create_forward_rnn()
示例#4
0
 def test_create_training_rnn(self):
     tf.reset_default_graph()
     with tf.Session():
         model = AcousticModel(self.num_layers, self.hidden_size,
                               self.batch_size, self.max_input_seq_length,
                               self.max_target_seq_length, self.input_dim,
                               self.normalization, self.num_labels)
         model.create_training_rnn(self.input_keep_prob,
                                   self.output_keep_prob, self.grad_clip,
                                   self.learning_rate, self.lr_decay_factor)
示例#5
0
def process_file(audio_processor, hyper_params, file):
    feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file)
    if original_feat_vec_length > hyper_params["max_input_seq_length"]:
        logging.warning("File too long")
        return
    elif original_feat_vec_length < hyper_params["max_input_seq_length"]:
        # Pad the feat_vec with zeros
        pad_length = hyper_params["max_input_seq_length"] - original_feat_vec_length
        padding = np.zeros((pad_length, hyper_params["input_dim"]), dtype=np.float)
        feat_vec = np.concatenate((feat_vec, padding), 0)

    with tf.Session() as sess:
        # create model
        model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1,
                              hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                              hyper_params["input_dim"], hyper_params["batch_normalization"],
                              hyper_params["char_map_length"])
        model.create_forward_rnn()
        model.initialize(sess)
        model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/")

        (a, b) = feat_vec.shape
        feat_vec = feat_vec.reshape((a, 1, b))
        predictions = model.process_input(sess, feat_vec, [original_feat_vec_length])
        transcribed_text = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction)
                            for prediction in predictions]
        print(transcribed_text[0])
示例#6
0
def record_and_write(audio_processor, hyper_params):
    import pyaudio
    _CHUNK = hyper_params["max_input_seq_length"]
    _SR = 22050
    p = pyaudio.PyAudio()

    with tf.Session() as sess:
        # create model
        model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1,
                              hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                              hyper_params["input_dim"], hyper_params["batch_normalization"],
                              language=hyper_params["language"])

        model.create_forward_rnn()
        model.initialize(sess)
        model.restore(sess, hyper_params["checkpoint_dir"])

        # Create stream of listening
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=_SR, input=True, frames_per_buffer=_CHUNK)
        print("NOW RECORDING...")

        while True:
            data = stream.read(_CHUNK)
            data = np.fromstring(data)
            feat_vec, original_feat_vec_length = audio_processor.process_signal(data, _SR)
            (a, b) = feat_vec.shape
            feat_vec = feat_vec.reshape((a, 1, b))
            result = model.process_input(sess, feat_vec, [original_feat_vec_length])
            print(result, end="")
示例#7
0
def evaluate(hyper_params):
    if hyper_params["test_dataset_dirs"] is None:
        logging.fatal("Setting test_dataset_dirs in config file is mandatory for evaluation mode")
        return

    # Load the test set data
    data_processor = dataprocessor.DataProcessor(hyper_params["test_dataset_dirs"])
    test_set = data_processor.get_dataset()

    logging.info("Using %d size of test set", len(test_set))

    if len(test_set) == 0:
        logging.fatal("No files in test set during an evaluation mode")
        return

    with tf.Session() as sess:
        # create model
        model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"],
                              hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                              hyper_params["input_dim"], hyper_params["batch_normalization"],
                              language=hyper_params["language"])

        model.create_forward_rnn()
        model.initialize(sess)
        model.restore(sess, hyper_params["checkpoint_dir"])

        wer, cer = model.evaluate_full(sess, test_set, hyper_params["max_input_seq_length"],
                                       hyper_params["signal_processing"])
        print("Resulting WER : {0:.3g} %".format(wer))
        print("Resulting CER : {0:.3g} %".format(cer))
        return
示例#8
0
文件: stt.py 项目: inikdom/rnn-speech
def process_file(audio_processor, hyper_params, file):
    feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file)
    if original_feat_vec_length > hyper_params["max_input_seq_length"]:
        logging.warning("File too long")
        return
    elif original_feat_vec_length < hyper_params["max_input_seq_length"]:
        # Pad the feat_vec with zeros
        pad_length = hyper_params["max_input_seq_length"] - original_feat_vec_length
        padding = np.zeros((pad_length, hyper_params["input_dim"]), dtype=np.float)
        feat_vec = np.concatenate((feat_vec, padding), 0)

    with tf.Session() as sess:
        # create model
        model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1,
                              hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                              hyper_params["input_dim"], hyper_params["batch_normalization"],
                              hyper_params["char_map_length"])
        model.create_forward_rnn()
        model.initialize(sess)
        model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/")

        (a, b) = feat_vec.shape
        feat_vec = feat_vec.reshape((a, 1, b))
        predictions = model.process_input(sess, feat_vec, [original_feat_vec_length])
        transcribed_text = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction)
                            for prediction in predictions]
        print(transcribed_text[0])
示例#9
0
文件: stt.py 项目: inikdom/rnn-speech
def evaluate(hyper_params):
    if hyper_params["test_dataset_dirs"] is None:
        logging.fatal("Setting test_dataset_dirs in config file is mandatory for evaluation mode")
        return

    # Load the test set data
    data_processor = dataprocessor.DataProcessor(hyper_params["test_dataset_dirs"])
    test_set = data_processor.get_dataset()

    logging.info("Using %d size of test set", len(test_set))

    if len(test_set) == 0:
        logging.fatal("No files in test set during an evaluation mode")
        return

    with tf.Session() as sess:
        # create model
        model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"],
                              hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                              hyper_params["input_dim"], hyper_params["batch_normalization"],
                              hyper_params["char_map_length"])

        model.create_forward_rnn()
        model.initialize(sess)
        model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/")

        wer, cer = model.evaluate_full(sess, test_set, hyper_params["max_input_seq_length"],
                                       hyper_params["signal_processing"], hyper_params["char_map"])
        print("Resulting WER : {0:.3g} %".format(wer))
        print("Resulting CER : {0:.3g} %".format(cer))
        return
示例#10
0
文件: stt.py 项目: inikdom/rnn-speech
def record_and_write(audio_processor, hyper_params):
    import pyaudio
    _CHUNK = hyper_params["max_input_seq_length"]
    _SR = 22050
    p = pyaudio.PyAudio()

    with tf.Session() as sess:
        # create model
        model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1,
                              hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                              hyper_params["input_dim"], hyper_params["batch_normalization"],
                              hyper_params["char_map_length"])

        model.create_forward_rnn()
        model.initialize(sess)
        model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/")

        # Create stream of listening
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=_SR, input=True, frames_per_buffer=_CHUNK)
        print("NOW RECORDING...")

        while True:
            data = stream.read(_CHUNK)
            data = np.fromstring(data)
            feat_vec, original_feat_vec_length = audio_processor.process_signal(data, _SR)
            (a, b) = feat_vec.shape
            feat_vec = feat_vec.reshape((a, 1, b))
            predictions = model.process_input(sess, feat_vec, [original_feat_vec_length])
            result = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction)
                      for prediction in predictions]
            print(result, end="")
示例#11
0
def createAcousticModel(session,
                        hyper_params,
                        batch_size,
                        forward_only=True,
                        tensorboard_dir=None,
                        tb_run_name=None):
    num_labels = 31
    input_dim = 123
    model = AcousticModel(session,
                          num_labels,
                          hyper_params["num_layers"],
                          hyper_params["hidden_size"],
                          hyper_params["dropout"],
                          batch_size,
                          hyper_params["learning_rate"],
                          hyper_params["lr_decay_factor"],
                          hyper_params["grad_clip"],
                          hyper_params["max_input_seq_length"],
                          hyper_params["max_target_seq_length"],
                          input_dim,
                          forward_only=forward_only,
                          tensorboard_dir=tensorboard_dir,
                          tb_run_name=tb_run_name)
    ckpt = tf.train.get_checkpoint_state(hyper_params["checkpoint_dir"])
    # Initialize variables
    session.run(tf.initialize_all_variables())
    # Restore from checkpoint (will overwrite variables)
    if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from {0}".format(
            ckpt.model_checkpoint_path))
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
    return model
示例#12
0
def process_file(audio_processor, hyper_params, file):
    feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file)
    if original_feat_vec_length > hyper_params["max_input_seq_length"]:
        logging.warning("File too long")
        return

    with tf.Session() as sess:
        # create model
        model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1,
                              hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                              hyper_params["input_dim"], hyper_params["batch_normalization"],
                              language=hyper_params["language"])
        model.create_forward_rnn()
        model.initialize(sess)
        model.restore(sess, hyper_params["checkpoint_dir"])

        (a, b) = feat_vec.shape
        feat_vec = feat_vec.reshape((a, 1, b))
        transcribed_text = model.process_input(sess, feat_vec, [original_feat_vec_length])
        print(transcribed_text[0])
示例#13
0
    def test_create_training_rnn_with_iterators(self):
        tf.reset_default_graph()

        with tf.Session():
            model = AcousticModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length,
                                  self.max_target_seq_length, self.input_dim, self.normalization, self.num_labels)

            # Create a Dataset from the train_set and the test_set
            train_dataset = model.build_dataset([["/file/path", "Test", 10]], self.batch_size,
                                                self.max_input_seq_length, self.max_target_seq_length,
                                                self.signal_processing, ENGLISH_CHAR_MAP)
            model.add_dataset_input(train_dataset)
            model.create_training_rnn(self.input_keep_prob, self.output_keep_prob, self.grad_clip,
                                      self.learning_rate, self.lr_decay_factor, use_iterator=True)
示例#14
0
def create_acoustic_model(session,
                          hyper_params,
                          batch_size,
                          forward_only=True,
                          tensorboard_dir=None,
                          tb_run_name=None,
                          timeline_enabled=False):
    num_labels = 31
    logging.info("Building model... (this takes a while)")
    model = AcousticModel(session,
                          num_labels,
                          hyper_params["num_layers"],
                          hyper_params["hidden_size"],
                          hyper_params["dropout_input_keep_prob"],
                          hyper_params["dropout_output_keep_prob"],
                          batch_size,
                          hyper_params["learning_rate"],
                          hyper_params["lr_decay_factor"],
                          hyper_params["grad_clip"],
                          hyper_params["max_input_seq_length"],
                          hyper_params["max_target_seq_length"],
                          hyper_params["input_dim"],
                          hyper_params["batch_normalization"],
                          forward_only=forward_only,
                          tensorboard_dir=tensorboard_dir,
                          tb_run_name=tb_run_name,
                          timeline_enabled=timeline_enabled)
    ckpt = tf.train.get_checkpoint_state(hyper_params["checkpoint_dir"])
    # Initialize variables
    session.run(tf.global_variables_initializer())
    # Restore from checkpoint (will overwrite variables)
    if ckpt:
        logging.info("Reading model parameters from %s",
                     ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        logging.info("Created model with fresh parameters.")
    return model
示例#15
0
    def test_create_training_rnn_with_iterators(self):
        tf.reset_default_graph()

        with tf.Session():
            model = AcousticModel(self.num_layers, self.hidden_size,
                                  self.batch_size, self.max_input_seq_length,
                                  self.max_target_seq_length, self.input_dim,
                                  self.normalization, self.num_labels)

            # Create a Dataset from the train_set and the test_set
            train_dataset = model.build_dataset([["/file/path", "Test", 10]],
                                                self.batch_size,
                                                self.max_input_seq_length,
                                                self.max_target_seq_length,
                                                self.signal_processing,
                                                ENGLISH_CHAR_MAP)
            model.add_dataset_input(train_dataset)
            model.create_training_rnn(self.input_keep_prob,
                                      self.output_keep_prob,
                                      self.grad_clip,
                                      self.learning_rate,
                                      self.lr_decay_factor,
                                      use_iterator=True)
示例#16
0
文件: stt.py 项目: inikdom/rnn-speech
def build_acoustic_training_rnn(sess, hyper_params, prog_params, train_set, test_set):
    model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"],
                          hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                          hyper_params["input_dim"], hyper_params["batch_normalization"],
                          hyper_params["char_map_length"])

    # Create a Dataset from the train_set and the test_set
    train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                        hyper_params["max_target_seq_length"], hyper_params["signal_processing"],
                                        hyper_params["char_map"])

    v_iterator = None
    if test_set is []:
        t_iterator = model.add_dataset_input(train_dataset)
        sess.run(t_iterator.initializer)
    else:
        test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                           hyper_params["max_target_seq_length"], hyper_params["signal_processing"],
                                           hyper_params["char_map"])

        # Build the input stream from the different datasets
        t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset)
        sess.run(t_iterator.initializer)
        sess.run(v_iterator.initializer)

    # Create the model
    model.create_training_rnn(hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"],
                              hyper_params["grad_clip"], hyper_params["learning_rate"],
                              hyper_params["lr_decay_factor"], use_iterator=True)
    model.add_tensorboard(sess, hyper_params["tensorboard_dir"], prog_params["tb_name"], prog_params["timeline"])
    model.initialize(sess)
    model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/")

    # Override the learning rate if given on the command line
    if prog_params["learn_rate"] is not None:
        model.set_learning_rate(sess, prog_params["learn_rate"])

    return model, t_iterator, v_iterator
示例#17
0
def build_acoustic_training_rnn(is_chief, is_ditributed, sess, hyper_params,
                                prog_params, train_set, test_set):
    model = AcousticModel(
        hyper_params["num_layers"], hyper_params["hidden_size"],
        hyper_params["batch_size"], hyper_params["max_input_seq_length"],
        hyper_params["max_target_seq_length"], hyper_params["input_dim"],
        hyper_params["batch_normalization"], hyper_params["char_map_length"])

    # Create a Dataset from the train_set and the test_set
    train_dataset = model.build_dataset(train_set, hyper_params["batch_size"],
                                        hyper_params["max_input_seq_length"],
                                        hyper_params["max_target_seq_length"],
                                        hyper_params["signal_processing"],
                                        hyper_params["char_map"])
    train_dataset = train_dataset.shuffle(10, reshuffle_each_iteration=True)
    v_iterator = None
    if test_set is []:
        t_iterator = model.add_dataset_input(train_dataset)
    else:
        test_dataset = model.build_dataset(
            test_set, hyper_params["batch_size"],
            hyper_params["max_input_seq_length"],
            hyper_params["max_target_seq_length"],
            hyper_params["signal_processing"], hyper_params["char_map"])
        # Build the input stream from the different datasets
        t_iterator, v_iterator = model.add_datasets_input(
            train_dataset, test_dataset)

    # Create the model
    #tensorboard_dir
    model.create_training_rnn(is_chief,
                              is_ditributed,
                              hyper_params["dropout_input_keep_prob"],
                              hyper_params["dropout_output_keep_prob"],
                              hyper_params["grad_clip"],
                              hyper_params["learning_rate"],
                              hyper_params["lr_decay_factor"],
                              use_iterator=True)
    model.add_tensorboard(sess, prog_params["train_dir"],
                          prog_params["timeline"])
    sv = None
    if is_ditributed:
        init_op = tf.global_variables_initializer()
        sv = tf.train.Supervisor(is_chief=is_chief,
                                 logdir=prog_params["train_dir"],
                                 init_op=init_op,
                                 recovery_wait_secs=1,
                                 summary_op=None,
                                 global_step=model.global_step)
        model.supervisor = sv
    else:
        model.initialize(sess)
        model.restore(sess, prog_params["train_dir"])

    # Override the learning rate if given on the command line
    if prog_params["learn_rate"] is not None:
        model.set_learning_rate(sess, prog_params["learn_rate"])

    return sv, model, t_iterator, v_iterator
 def setUpClass(cls):
     with tf.Session() as sess:
         cls.model = AcousticModel(sess, 2, 50, 0.8, 0.5, 3, 0.0003, 0.33, 5, 1800, 600, 120, False,
                                   forward_only=False, tensorboard_dir=None, tb_run_name=None,
                                   timeline_enabled=False, language='english')
示例#19
0
def build_training_rnn(sess, hyper_params, prog_params, train_set, test_set):
    model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"],
                          hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"],
                          hyper_params["input_dim"], hyper_params["batch_normalization"],
                          language=hyper_params["language"])

    # Create a Dataset from the train_set and the test_set
    train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                        hyper_params["max_target_seq_length"], hyper_params["signal_processing"])

    v_iterator = None
    if test_set is []:
        t_iterator = model.add_dataset_input(train_dataset)
        sess.run(t_iterator.initializer)
    else:
        test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"],
                                           hyper_params["max_target_seq_length"], hyper_params["signal_processing"])

        # Build the input stream from the different datasets
        t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset)
        sess.run(t_iterator.initializer)
        sess.run(v_iterator.initializer)

    # Create the model
    model.create_training_rnn(hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"],
                              hyper_params["grad_clip"], hyper_params["learning_rate"],
                              hyper_params["lr_decay_factor"], use_iterator=True)
    model.add_tensorboard(sess, hyper_params["tensorboard_dir"], prog_params["tb_name"], prog_params["timeline"])
    model.initialize(sess)
    model.restore(sess, hyper_params["checkpoint_dir"])

    # Override the learning rate if given on the command line
    if prog_params["learn_rate"] is not None:
        model.set_learning_rate(sess, prog_params["learn_rate"])

    return model, t_iterator, v_iterator
示例#20
0
 def test_create_forward_rnn(self):
     tf.reset_default_graph()
     with tf.Session():
         model = AcousticModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length,
                               self.max_target_seq_length, self.input_dim, self.normalization, self.num_labels)
         model.create_forward_rnn()