def decode_test(session, decode_op, network, dataset, label_type, rate=1.0, is_multitask=False): """Visualize label outputs. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: Dataset class label_type: phone39 or phone48 or phone61 or character rate: rate of evaluation data to use is_multitask: if True, evaluate the multitask model """ batch_size = 1 num_examples = dataset.data_num * rate iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 map_file_path_phone = '../evaluation/mapping_files/ctc/phone2num_' + \ label_type[5:7] + '.txt' map_file_path_char = '../evaluation/mapping_files/ctc/char2num.txt' for step in range(iteration): # Create feed dictionary for next mini batch inputs, labels_true, seq_len, input_names = dataset.next_batch( batch_size=batch_size) # if is_multitask: # if label_type == 'character': # labels_true = labels_true[0] # else: # labels_true = labels_true[1] feed_dict = { network.inputs_pl: inputs, network.seq_len_pl: seq_len, network.keep_prob_input_pl: 1.0, network.keep_prob_hidden_pl: 1.0 } # Visualize batch_size_each = len(labels_true) labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): if label_type == 'character': print('-----wav: %s-----' % input_names[i_batch]) print('True: %s' % num2char( labels_true[i_batch], map_file_path_char)) print('Pred: %s' % num2char( labels_pred[i_batch], map_file_path_char)) else: # Decode test (39 phones) print('-----wav: %s-----' % input_names[i_batch]) print('True: %s' % num2phone( labels_true[i_batch], map_file_path_phone)) print('Pred: %s' % num2phone( labels_pred[i_batch], map_file_path_phone))
def decode_test(session, decode_op, network, dataset, label_type, save_path=None): """Visualize label outputs of CTC model. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of a `Dataset` class label_type: string, phone39 or phone48 or phone61 or character save_path: path to save decoding results """ # Batch size is expected to be 1 iteration = dataset.data_num # Make data generator mini_batch = dataset.next_batch(batch_size=1) if label_type == 'character': map_file_path = '../metric/mapping_files/ctc/char2num.txt' else: map_file_path = '../metric/mapping_files/ctc/phone2num_' + \ label_type[5:7] + '.txt' if save_path is not None: sys.stdout = open(join(network.model_dir, 'decode.txt'), 'w') for step in range(iteration): # Create feed dictionary for next mini batch inputs, labels_true_st, inputs_seq_len, input_names = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } # Visualize labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size=1) labels_pred = sparsetensor2list(labels_pred_st, batch_size=1) if label_type == 'character': print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2char(labels_true[0], map_file_path)) print('Pred: %s' % num2char(labels_pred[0], map_file_path)) else: print('----- wav: %s -----' % input_names[0]) print('True: %s' % num2phone(labels_true[0], map_file_path)) print('Pred: %s' % num2phone(labels_pred[0], map_file_path))
def check_reading(self, num_gpu, is_sorted): print('----- num_gpu: ' + str(num_gpu) + ', is_sorted: ' + str(is_sorted) + ' -----') batch_size = 64 dataset = DataSet(data_type='train', label_type_second='phone61', batch_size=batch_size, num_stack=3, num_skip=3, is_sorted=is_sorted, is_progressbar=True, num_gpu=num_gpu) tf.reset_default_graph() with tf.Session().as_default() as sess: print('=> Reading mini-batch...') map_file_path_char = '../metric/mapping_files/ctc/char2num.txt' map_file_path_phone = '../metric/mapping_files/ctc/phone2num_61.txt' mini_batch = dataset.next_batch(session=sess) iter_per_epoch = int(dataset.data_num / (batch_size * num_gpu)) + 1 for i in range(iter_per_epoch + 1): inputs, labels_char_st, labels_phone_st, inputs_seq_len, input_names = mini_batch.__next__( ) if num_gpu > 1: for inputs_gpu in inputs: print(inputs_gpu.shape) labels_char_st = labels_char_st[0] labels_phone_st = labels_phone_st[0] labels_char = sparsetensor2list(labels_char_st, batch_size=len(inputs)) labels_phone = sparsetensor2list(labels_phone_st, batch_size=len(inputs)) if num_gpu == 1: for inputs_i, labels_i in zip(inputs, labels_char): if len(inputs_i) < len(labels_i): print(len(inputs_i)) print(len(labels_i)) raise ValueError for inputs_i, labels_i in zip(inputs, labels_phone): if len(inputs_i) < len(labels_i): print(len(inputs_i)) print(len(labels_i)) raise ValueError str_true_char = num2char(labels_char[0], map_file_path_char) str_true_char = re.sub(r'_', ' ', str_true_char) str_true_phone = num2phone(labels_phone[0], map_file_path_phone) print(str_true_char) print(str_true_phone) print('-----')
def test(self): dataset = DataSet(data_type='dev', label_type='character', num_stack=3, num_skip=3, is_sorted=True, is_progressbar=True) print('=> Reading mini-batch...') map_file_path = '../evaluation/mapping_files/ctc/char2num.txt' for i in tqdm(range(20000)): inputs, labels, seq_len, input_names = dataset.next_batch( batch_size=64) indices, values, dense_shape = list2sparsetensor(labels) str_true = num2char(labels[0], map_file_path) str_true = re.sub(r'_', ' ', str_true) print(str_true)
def do_eval_cer(session, decode_op, network, dataset, label_type, is_test=None, eval_batch_size=None, rate=1.0, is_progressbar=False, is_multitask=False, is_main=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: Dataset class label_type: character or kanji is_test: set to True when evaluating by the test set eval_batch_size: batch size on evaluation rate: rate of evaluation data to use is_progressbar: if True, visualize progressbar is_multitask: if True, evaluate the multitask model is_main: if True, evaluate the main task Return: cer_mean: mean character error rate """ batch_size = network.batch_size if eval_batch_size is None else eval_batch_size num_examples = dataset.data_num * rate iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 if label_type == 'character': map_file_path = '../evaluation/mapping_files/ctc/char2num.txt' elif label_type == 'kanji': map_file_path = '../evaluation/mapping_files/ctc/kanji2num.txt' iterator = tqdm(range(iteration)) if is_progressbar else range(iteration) for step in iterator: # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true, seq_len, _ = dataset.next_batch( batch_size=batch_size) else: if is_main: inputs, labels_true, _, seq_len, _ = dataset.next_batch( batch_size=batch_size) else: inputs, _, labels_true, seq_len, _ = dataset.next_batch( batch_size=batch_size) feed_dict = { network.inputs_pl: inputs, network.seq_len_pl: seq_len, network.keep_prob_input_pl: 1.0, network.keep_prob_hidden_pl: 1.0 } batch_size_each = len(labels_true) labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): # Convert from list to string str_pred = num2char(labels_pred[i_batch], map_file_path) str_pred = re.sub(r'_', '', str_pred) # TODO: change in case of character if label_type == 'kanji' and is_test: str_true = labels_true[i_batch] else: str_true = num2char(labels_true[i_batch], map_file_path) str_true = re.sub(r'_', '', str_true) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num print(' CER: %f %%' % (cer_mean * 100)) return cer_mean
def decode_test(session, decode_op, network, dataset, label_type, is_test, eval_batch_size=None, rate=1.0): """Visualize label outputs. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: Dataset class label_type: phone or character or kanji is_test: set to True when evaluating by the test set eval_batch_size: batch size on evaluation rate: rate of evaluation data to use """ batch_size = 1 num_examples = dataset.data_num * rate iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 map_file_path_phone = '../evaluation/mapping_files/ctc/phone2num.txt' if label_type == 'character': map_file_path = '../evaluation/mapping_files/ctc/char2num.txt' elif label_type == 'kanji': map_file_path = '../evaluation/mapping_files/ctc/kanji2num.txt' for step in range(iteration): # Create feed dictionary for next mini batch inputs, labels_true, seq_len, input_names = dataset.next_batch( batch_size=batch_size) feed_dict = { network.inputs_pl: inputs, network.seq_len_pl: seq_len, network.keep_prob_input_pl: 1.0, network.keep_prob_hidden_pl: 1.0 } # Visualize batch_size_each = len(labels_true) labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): if label_type in ['character', 'kanji']: # Convert from list to string str_pred = num2char(labels_pred[i_batch], map_file_path) if label_type == 'kanji' and is_test: str_true = labels_true[i_batch] else: str_true = num2char(labels_true[i_batch], map_file_path) print('-----wav: %s-----' % input_names[i_batch]) print('True: %s' % str_true) print('Pred: %s' % str_pred) elif label_type == 'phone': print('-----wav: %s-----' % input_names[i_batch]) print('True: %s' % num2phone(labels_true[i_batch], map_file_path_phone)) print('Pred: %s' % num2phone(labels_pred[i_batch], map_file_path_phone))
def do_eval_cer(session, decode_op, network, dataset, label_type, is_test=None, eval_batch_size=None, is_progressbar=False, is_multitask=False, is_main=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of `Dataset` class label_type: string, character or kanji is_test: set to True when evaluating by the test set eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize progressbar is_multitask: if True, evaluate the multitask model is_main: if True, evaluate the main task Return: cer_mean: An average of CER """ if eval_batch_size is None: batch_size = network.batch_size else: batch_size = eval_batch_size num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) if label_type == 'character': map_file_path = '../metric/mapping_files/ctc/char2num.txt' elif label_type == 'kanji': map_file_path = '../metric/mapping_files/ctc/kanji2num.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__() else: if is_main: inputs, labels_true_st, _, inputs_seq_len, _ = mini_batch.__next__( ) else: inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size_each) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): # Convert from list to string str_pred = num2char(labels_pred[i_batch], map_file_path) # TODO: change in case of character if label_type == 'kanji' and is_test: str_true = ''.join(labels_true[i_batch]) # NOTE* 漢字の場合はテストデータのラベルはそのまま保存してある else: str_true = num2char(labels_true[i_batch], map_file_path) # Remove silence(_) labels str_true = re.sub(r'[_]+', "", str_true) str_pred = re.sub(r'[_]+', "", str_pred) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num return cer_mean
def do_eval_cer(session, decode_op, network, dataset, eval_batch_size=None, is_progressbar=False, is_multitask=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of a `Dataset` class eval_batch_size: int, batch size when evaluating the model is_progressbar: if True, visualize the progressbar is_multitask: if True, evaluate the multitask model Return: cer_mean: An average of CER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 map_file_path = '../metric/mapping_files/attention/char2num.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true, inputs_seq_len, _, _ = mini_batch.__next__() else: inputs, labels_true, _, inputs_seq_len, _, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) predicted_ids = session.run(decode_op, feed_dict=feed_dict) for i_batch in range(batch_size_each): # Convert from list to string str_true = num2char(labels_true[i_batch], map_file_path) str_pred = num2char(predicted_ids[i_batch], map_file_path) # Remove silence(_) labels str_true = re.sub(r'[_<>]+', "", str_true) str_pred = re.sub(r'[_]+', "", str_pred) print(str_true) print(str_pred) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num return cer_mean
def do_train(network, optimizer, learning_rate, batch_size, epoch_num, label_type, num_stack, num_skip, train_data_size): """Run training. Args: network: network to train optimizer: string, the name of optimizer. ex.) adam, rmsprop learning_rate: initial learning rate batch_size: size of mini batch epoch_num: epoch num to train label_type: phone or character or kanji num_stack: int, the number of frames to stack num_skip: int, the number of frames to skip train_data_size: default or large """ # Load dataset train_data = DataSet(data_type='train', label_type=label_type, train_data_size=train_data_size, num_stack=num_stack, num_skip=num_skip, is_sorted=True) dev_data = DataSet(data_type='dev', label_type=label_type, train_data_size=train_data_size, num_stack=num_stack, num_skip=num_skip, is_sorted=False) eval1_data = DataSet(data_type='eval1', label_type=label_type, train_data_size=train_data_size, num_stack=num_stack, num_skip=num_skip, is_sorted=False) eval2_data = DataSet(data_type='eval2', label_type=label_type, train_data_size=train_data_size, num_stack=num_stack, num_skip=num_skip, is_sorted=False) eval3_data = DataSet(data_type='eval3', label_type=label_type, train_data_size=train_data_size, num_stack=num_stack, num_skip=num_skip, is_sorted=False) # Tell TensorFlow that the model will be built into the default graph with tf.Graph().as_default(): # Define model network.define() # NOTE: define model under tf.Graph() # Add to the graph each operation loss_op = network.loss() train_op = network.train(optimizer=optimizer, learning_rate_init=learning_rate, is_scheduled=False) decode_op = network.decoder(decode_type='beam_search', beam_width=20) per_op = network.ler(decode_op) # Build the summary tensor based on the TensorFlow collection of # summaries summary_train = tf.summary.merge(network.summaries_train) summary_dev = tf.summary.merge(network.summaries_dev) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints saver = tf.train.Saver(max_to_keep=None) # Count total parameters parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M parameters" % (len(parameters_dict.keys()), "{:,}".format( total_parameters / 1000000))) csv_steps = [] csv_train_loss = [] csv_dev_loss = [] # Create a session for running operation on the graph with tf.Session() as sess: # Instantiate a SummaryWriter to output summaries and the graph summary_writer = tf.summary.FileWriter(network.model_dir, sess.graph) # Initialize parameters sess.run(init_op) # Train model iter_per_epoch = int(train_data.data_num / batch_size) if (train_data.data_num / batch_size) != int( train_data.data_num / batch_size): iter_per_epoch += 1 max_steps = iter_per_epoch * epoch_num start_time_train = time.time() start_time_epoch = time.time() start_time_step = time.time() error_best = 1 for step in range(max_steps): # Create feed dictionary for next mini batch (train) inputs, labels, seq_len, _ = train_data.next_batch( batch_size=batch_size) indices, values, dense_shape = list2sparsetensor(labels) feed_dict_train = { network.inputs_pl: inputs, network.label_indices_pl: indices, network.label_values_pl: values, network.label_shape_pl: dense_shape, network.seq_len_pl: seq_len, network.keep_prob_input_pl: network.dropout_ratio_input, network.keep_prob_hidden_pl: network.dropout_ratio_hidden, network.lr_pl: learning_rate } # Create feed dictionary for next mini batch (dev) inputs, labels, seq_len, _ = dev_data.next_batch( batch_size=batch_size) indices, values, dense_shape = list2sparsetensor(labels) feed_dict_dev = { network.inputs_pl: inputs, network.label_indices_pl: indices, network.label_values_pl: values, network.label_shape_pl: dense_shape, network.seq_len_pl: seq_len, network.keep_prob_input_pl: network.dropout_ratio_input, network.keep_prob_hidden_pl: network.dropout_ratio_hidden } # Update parameters & compute loss _, loss_train = sess.run([train_op, loss_op], feed_dict=feed_dict_train) loss_dev = sess.run(loss_op, feed_dict=feed_dict_dev) csv_steps.append(step) csv_train_loss.append(loss_train) csv_dev_loss.append(loss_dev) if (step + 1) % 100 == 0: # Change feed dict for evaluation feed_dict_train[network.keep_prob_input_pl] = 1.0 feed_dict_train[network.keep_prob_hidden_pl] = 1.0 feed_dict_dev[network.keep_prob_input_pl] = 1.0 feed_dict_dev[network.keep_prob_hidden_pl] = 1.0 # Compute accuracy & \update event file ler_train, summary_str_train = sess.run( [per_op, summary_train], feed_dict=feed_dict_train) ler_dev, summary_str_dev, labels_st = sess.run( [per_op, summary_dev, decode_op], feed_dict=feed_dict_dev) summary_writer.add_summary(summary_str_train, step + 1) summary_writer.add_summary(summary_str_dev, step + 1) summary_writer.flush() # Decode try: labels_pred = sparsetensor2list(labels_st, batch_size) except: labels_pred = [[0] * batch_size] duration_step = time.time() - start_time_step print( 'Step %d: loss = %.3f (%.3f) / ler = %.4f (%.4f) (%.3f min)' % (step + 1, loss_train, loss_dev, ler_train, ler_dev, duration_step / 60)) # print('Step %d: loss = %.3f / ler = %.4f (%.3f min)' % # (step + 1, loss_train, ler_train, duration_step / 60)) if label_type == 'kanji': map_file_path = '../evaluation/mapping_files/ctc/kanji2num.txt' print('True: %s' % num2char(labels[-1], map_file_path)) print('Pred: %s' % num2char(labels_pred[-1], map_file_path)) elif label_type == 'character': map_file_path = '../evaluation/mapping_files/ctc/char2num.txt' print('True: %s' % num2char(labels[-1], map_file_path)) print('Pred: %s' % num2char(labels_pred[-1], map_file_path)) elif label_type == 'phone': map_file_path = '../evaluation/mapping_files/ctc/phone2num.txt' print('True: %s' % num2phone(labels[-1], map_file_path)) print('Pred: %s' % num2phone(labels_pred[-1], map_file_path)) sys.stdout.flush() start_time_step = time.time() # Save checkpoint and evaluate model per epoch if (step + 1) % iter_per_epoch == 0 or (step + 1) == max_steps: duration_epoch = time.time() - start_time_epoch epoch = (step + 1) // iter_per_epoch print('-----EPOCH:%d (%.3f min)-----' % (epoch, duration_epoch / 60)) # Save model (check point) checkpoint_file = join(network.model_dir, 'model.ckpt') save_path = saver.save(sess, checkpoint_file, global_step=epoch) print("Model saved in file: %s" % save_path) start_time_eval = time.time() if label_type in ['character', 'kanji']: print('■Dev Evaluation:■') error_epoch = do_eval_cer(session=sess, decode_op=decode_op, network=network, dataset=dev_data, label_type=label_type, eval_batch_size=batch_size) if error_epoch < error_best: error_best = error_epoch print('■■■ ↑Best Score (CER)↑ ■■■') print('■eval1 Evaluation:■') cer_eval1 = do_eval_cer(session=sess, decode_op=decode_op, network=network, dataset=eval1_data, label_type=label_type, is_test=True, eval_batch_size=batch_size) print('■eval2 Evaluation:■') cer_eval2 = do_eval_cer(session=sess, decode_op=decode_op, network=network, dataset=eval2_data, label_type=label_type, is_test=- True, eval_batch_size=batch_size) print('■eval3 Evaluation:■') cer_eval3 = do_eval_cer(session=sess, decode_op=decode_op, network=network, dataset=eval3_data, label_type=label_type, is_test=True, eval_batch_size=batch_size) cer_mean = (cer_eval1 + cer_eval2 + cer_eval3) / 3. print('■Mean:■') print(' CER: %f %%' % (cer_mean * 100)) else: print('■Dev Evaluation:■') error_epoch = do_eval_per(session=sess, per_op=per_op, network=network, dataset=dev_data, eval_batch_size=batch_size) if error_epoch < error_best: error_best = error_epoch print('■■■ ↑Best Score (PER)↑ ■■■') print('■eval1 Evaluation:■') per_eval1 = do_eval_per(session=sess, per_op=per_op, network=network, dataset=eval1_data, eval_batch_size=batch_size) print('■eval2 Evaluation:■') per_eval2 = do_eval_per(session=sess, per_op=per_op, network=network, dataset=eval2_data, eval_batch_size=batch_size) print('■eval3 Evaluation:■') per_eval3 = do_eval_per(session=sess, per_op=per_op, network=network, dataset=eval3_data, eval_batch_size=batch_size) per_mean = (per_eval1 + per_eval2 + per_eval3) / 3. print('■Mean:■') print(' PER: %f %%' % (per_mean * 100)) duration_eval = time.time() - start_time_eval print('Evaluation time: %.3f min' % (duration_eval / 60)) start_time_epoch = time.time() start_time_step = time.time() duration_train = time.time() - start_time_train print('Total time: %.3f hour' % (duration_train / 3600)) # Save train & dev loss save_loss(csv_steps, csv_train_loss, csv_dev_loss, save_path=network.model_dir) # Training was finished correctly with open(join(network.model_dir, 'complete.txt'), 'w') as f: f.write('')