def test_create_training_rnn(self): tf.reset_default_graph() with tf.Session(): model = AcousticModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.input_dim, self.normalization, self.num_labels) model.create_training_rnn(self.input_keep_prob, self.output_keep_prob, self.grad_clip, self.learning_rate, self.lr_decay_factor)
def build_acoustic_training_rnn(is_mpi,is_chief, hyper_params, prog_params, train_set, test_set): model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"], hyper_params["char_map"]) #train_dataset = train_dataset.shuffle(10,reshuffle_each_iteration=True) v_iterator = None if test_set is []: t_iterator = model.add_dataset_input(train_dataset) else: test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"], hyper_params["char_map"]) # Build the input stream from the different datasets t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset) # Create the model #tensorboard_dir model.create_training_rnn(is_mpi, hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"], hyper_params["grad_clip"], hyper_params["learning_rate"], hyper_params["lr_decay_factor"], use_iterator=True,is_sync=prog_params['is_sync'],is_chief=is_chief) if is_chief: model.add_tensorboard(prog_params["train_dir"], prog_params["timeline"]) return model, t_iterator, v_iterator
def test_create_forward_rnn(self): tf.reset_default_graph() with tf.Session(): model = AcousticModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.input_dim, self.normalization, self.num_labels) model.create_forward_rnn()
def process_file(audio_processor, hyper_params, file): feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file) if original_feat_vec_length > hyper_params["max_input_seq_length"]: logging.warning("File too long") return elif original_feat_vec_length < hyper_params["max_input_seq_length"]: # Pad the feat_vec with zeros pad_length = hyper_params["max_input_seq_length"] - original_feat_vec_length padding = np.zeros((pad_length, hyper_params["input_dim"]), dtype=np.float) feat_vec = np.concatenate((feat_vec, padding), 0) with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/") (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) predictions = model.process_input(sess, feat_vec, [original_feat_vec_length]) transcribed_text = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction) for prediction in predictions] print(transcribed_text[0])
def record_and_write(audio_processor, hyper_params): import pyaudio _CHUNK = hyper_params["max_input_seq_length"] _SR = 22050 p = pyaudio.PyAudio() with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], language=hyper_params["language"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"]) # Create stream of listening stream = p.open(format=pyaudio.paInt16, channels=1, rate=_SR, input=True, frames_per_buffer=_CHUNK) print("NOW RECORDING...") while True: data = stream.read(_CHUNK) data = np.fromstring(data) feat_vec, original_feat_vec_length = audio_processor.process_signal(data, _SR) (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) result = model.process_input(sess, feat_vec, [original_feat_vec_length]) print(result, end="")
def evaluate(hyper_params): if hyper_params["test_dataset_dirs"] is None: logging.fatal("Setting test_dataset_dirs in config file is mandatory for evaluation mode") return # Load the test set data data_processor = dataprocessor.DataProcessor(hyper_params["test_dataset_dirs"]) test_set = data_processor.get_dataset() logging.info("Using %d size of test set", len(test_set)) if len(test_set) == 0: logging.fatal("No files in test set during an evaluation mode") return with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], language=hyper_params["language"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"]) wer, cer = model.evaluate_full(sess, test_set, hyper_params["max_input_seq_length"], hyper_params["signal_processing"]) print("Resulting WER : {0:.3g} %".format(wer)) print("Resulting CER : {0:.3g} %".format(cer)) return
def evaluate(hyper_params): if hyper_params["test_dataset_dirs"] is None: logging.fatal("Setting test_dataset_dirs in config file is mandatory for evaluation mode") return # Load the test set data data_processor = dataprocessor.DataProcessor(hyper_params["test_dataset_dirs"]) test_set = data_processor.get_dataset() logging.info("Using %d size of test set", len(test_set)) if len(test_set) == 0: logging.fatal("No files in test set during an evaluation mode") return with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/") wer, cer = model.evaluate_full(sess, test_set, hyper_params["max_input_seq_length"], hyper_params["signal_processing"], hyper_params["char_map"]) print("Resulting WER : {0:.3g} %".format(wer)) print("Resulting CER : {0:.3g} %".format(cer)) return
def record_and_write(audio_processor, hyper_params): import pyaudio _CHUNK = hyper_params["max_input_seq_length"] _SR = 22050 p = pyaudio.PyAudio() with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/") # Create stream of listening stream = p.open(format=pyaudio.paInt16, channels=1, rate=_SR, input=True, frames_per_buffer=_CHUNK) print("NOW RECORDING...") while True: data = stream.read(_CHUNK) data = np.fromstring(data) feat_vec, original_feat_vec_length = audio_processor.process_signal(data, _SR) (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) predictions = model.process_input(sess, feat_vec, [original_feat_vec_length]) result = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction) for prediction in predictions] print(result, end="")
def createAcousticModel(session, hyper_params, batch_size, forward_only=True, tensorboard_dir=None, tb_run_name=None): num_labels = 31 input_dim = 123 model = AcousticModel(session, num_labels, hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["dropout"], batch_size, hyper_params["learning_rate"], hyper_params["lr_decay_factor"], hyper_params["grad_clip"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], input_dim, forward_only=forward_only, tensorboard_dir=tensorboard_dir, tb_run_name=tb_run_name) ckpt = tf.train.get_checkpoint_state(hyper_params["checkpoint_dir"]) # Initialize variables session.run(tf.initialize_all_variables()) # Restore from checkpoint (will overwrite variables) if ckpt and gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from {0}".format( ckpt.model_checkpoint_path)) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") return model
def process_file(audio_processor, hyper_params, file): feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file) if original_feat_vec_length > hyper_params["max_input_seq_length"]: logging.warning("File too long") return with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], language=hyper_params["language"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"]) (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) transcribed_text = model.process_input(sess, feat_vec, [original_feat_vec_length]) print(transcribed_text[0])
def test_create_training_rnn_with_iterators(self): tf.reset_default_graph() with tf.Session(): model = AcousticModel(self.num_layers, self.hidden_size, self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.input_dim, self.normalization, self.num_labels) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset([["/file/path", "Test", 10]], self.batch_size, self.max_input_seq_length, self.max_target_seq_length, self.signal_processing, ENGLISH_CHAR_MAP) model.add_dataset_input(train_dataset) model.create_training_rnn(self.input_keep_prob, self.output_keep_prob, self.grad_clip, self.learning_rate, self.lr_decay_factor, use_iterator=True)
def create_acoustic_model(session, hyper_params, batch_size, forward_only=True, tensorboard_dir=None, tb_run_name=None, timeline_enabled=False): num_labels = 31 logging.info("Building model... (this takes a while)") model = AcousticModel(session, num_labels, hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"], batch_size, hyper_params["learning_rate"], hyper_params["lr_decay_factor"], hyper_params["grad_clip"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], forward_only=forward_only, tensorboard_dir=tensorboard_dir, tb_run_name=tb_run_name, timeline_enabled=timeline_enabled) ckpt = tf.train.get_checkpoint_state(hyper_params["checkpoint_dir"]) # Initialize variables session.run(tf.global_variables_initializer()) # Restore from checkpoint (will overwrite variables) if ckpt: logging.info("Reading model parameters from %s", ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: logging.info("Created model with fresh parameters.") return model
def build_acoustic_training_rnn(sess, hyper_params, prog_params, train_set, test_set): model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"], hyper_params["char_map"]) v_iterator = None if test_set is []: t_iterator = model.add_dataset_input(train_dataset) sess.run(t_iterator.initializer) else: test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"], hyper_params["char_map"]) # Build the input stream from the different datasets t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset) sess.run(t_iterator.initializer) sess.run(v_iterator.initializer) # Create the model model.create_training_rnn(hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"], hyper_params["grad_clip"], hyper_params["learning_rate"], hyper_params["lr_decay_factor"], use_iterator=True) model.add_tensorboard(sess, hyper_params["tensorboard_dir"], prog_params["tb_name"], prog_params["timeline"]) model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/") # Override the learning rate if given on the command line if prog_params["learn_rate"] is not None: model.set_learning_rate(sess, prog_params["learn_rate"]) return model, t_iterator, v_iterator
def build_acoustic_training_rnn(is_chief, is_ditributed, sess, hyper_params, prog_params, train_set, test_set): model = AcousticModel( hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"], hyper_params["char_map"]) train_dataset = train_dataset.shuffle(10, reshuffle_each_iteration=True) v_iterator = None if test_set is []: t_iterator = model.add_dataset_input(train_dataset) else: test_dataset = model.build_dataset( test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"], hyper_params["char_map"]) # Build the input stream from the different datasets t_iterator, v_iterator = model.add_datasets_input( train_dataset, test_dataset) # Create the model #tensorboard_dir model.create_training_rnn(is_chief, is_ditributed, hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"], hyper_params["grad_clip"], hyper_params["learning_rate"], hyper_params["lr_decay_factor"], use_iterator=True) model.add_tensorboard(sess, prog_params["train_dir"], prog_params["timeline"]) sv = None if is_ditributed: init_op = tf.global_variables_initializer() sv = tf.train.Supervisor(is_chief=is_chief, logdir=prog_params["train_dir"], init_op=init_op, recovery_wait_secs=1, summary_op=None, global_step=model.global_step) model.supervisor = sv else: model.initialize(sess) model.restore(sess, prog_params["train_dir"]) # Override the learning rate if given on the command line if prog_params["learn_rate"] is not None: model.set_learning_rate(sess, prog_params["learn_rate"]) return sv, model, t_iterator, v_iterator
def setUpClass(cls): with tf.Session() as sess: cls.model = AcousticModel(sess, 2, 50, 0.8, 0.5, 3, 0.0003, 0.33, 5, 1800, 600, 120, False, forward_only=False, tensorboard_dir=None, tb_run_name=None, timeline_enabled=False, language='english')
def build_training_rnn(sess, hyper_params, prog_params, train_set, test_set): model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], language=hyper_params["language"]) # Create a Dataset from the train_set and the test_set train_dataset = model.build_dataset(train_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"]) v_iterator = None if test_set is []: t_iterator = model.add_dataset_input(train_dataset) sess.run(t_iterator.initializer) else: test_dataset = model.build_dataset(test_set, hyper_params["batch_size"], hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["signal_processing"]) # Build the input stream from the different datasets t_iterator, v_iterator = model.add_datasets_input(train_dataset, test_dataset) sess.run(t_iterator.initializer) sess.run(v_iterator.initializer) # Create the model model.create_training_rnn(hyper_params["dropout_input_keep_prob"], hyper_params["dropout_output_keep_prob"], hyper_params["grad_clip"], hyper_params["learning_rate"], hyper_params["lr_decay_factor"], use_iterator=True) model.add_tensorboard(sess, hyper_params["tensorboard_dir"], prog_params["tb_name"], prog_params["timeline"]) model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"]) # Override the learning rate if given on the command line if prog_params["learn_rate"] is not None: model.set_learning_rate(sess, prog_params["learn_rate"]) return model, t_iterator, v_iterator