def __init__(self, data_type, label_type, batch_size, eos_index, is_sorted=True, is_progressbar=False, num_gpu=1): """A class for loading dataset. Args: data_type: string, train or dev or test label_type: string, phone39 or phone48 or phone61 or character eos_index: int , the index of <EOS> class is_sorted: if True, sort dataset by frame num is_progressbar: if True, visualize progressbar num_gpu: int, if more than 1, divide batch_size by num_gpu """ if data_type not in ['train', 'dev', 'test']: raise ValueError('data_type is "train" or "dev" or "test".') self.data_type = data_type self.label_type = label_type self.batch_size = batch_size * num_gpu self.eos_index = eos_index self.is_sorted = is_sorted self.is_progressbar = is_progressbar self.num_gpu = num_gpu self.input_size = 123 input_path = join('/n/sd8/inaguma/corpus/timit/dataset/inputs/', data_type) label_path = join( '/n/sd8/inaguma/corpus/timit/dataset/labels/attention/', label_type, data_type) # Load the frame number dictionary with open(join(input_path, 'frame_num.pickle'), 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label by frame num frame_num_tuple_sorted = sorted(self.frame_num_dict.items(), key=lambda x: x[1]) input_paths, label_paths = [], [] for input_name, frame_num in frame_num_tuple_sorted: input_paths.append(join(input_path, input_name + '.npy')) label_paths.append(join(label_path, input_name + '.npy')) self.input_paths = np.array(input_paths) self.label_paths = np.array(label_paths) self.data_num = len(self.input_paths) # Load all dataset in advance print('=> Loading ' + data_type + ' dataset (' + label_type + ')...') input_list, label_list = [], [] for i in wrap_iterator(range(self.data_num), self.is_progressbar): input_list.append(np.load(self.input_paths[i])) label_list.append(np.load(self.label_paths[i])) self.input_list = np.array(input_list) self.label_list = np.array(label_list) self.rest = set(range(0, self.data_num, 1))
def do_eval_per(session, decode_op, per_op, network, dataset, label_type, eos_index, eval_batch_size=None, is_progressbar=False): """Evaluate trained model by Phone Error Rate. Args: session: session of training model decode_op: operation for decoding per_op: operation for computing phone error rate network: network to evaluate dataset: An instance of a `Dataset' class label_type: string, phone39 or phone48 or phone61 eos_index: int, the index of <EOS> class eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize the progressbar Returns: per_global: An average of PER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size train_label_type = label_type data_label_type = dataset.label_type num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 per_global = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) phone2num_map_file_path = '../metrics/mapping_files/attention/phone2num_' + \ train_label_type[5:7] + '.txt' phone2num_39_map_file_path = '../metrics/mapping_files/attention/phone2num_39.txt' phone2phone_map_file_path = '../metrics/mapping_files/phone2phone.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini-batch inputs, att_labels_true, _, inputs_seq_len, _, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) if False: # Evaluate by 61 phones per_local = session.run(per_op, feed_dict=feed_dict) per_global += per_local * batch_size_each else: # Evaluate by 39 phones predicted_ids = session.run(decode_op, feed_dict=feed_dict) predicted_ids_phone39 = [] labels_true_phone39 = [] for i_batch in range(batch_size_each): # Convert from num to phone (-> list of phone strings) phone_pred_seq = num2phone(predicted_ids[i_batch], phone2num_map_file_path) phone_pred_list = phone_pred_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_pred_list = map_to_39phone(phone_pred_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_pred_list = phone2num(phone_pred_list, phone2num_39_map_file_path) predicted_ids_phone39.append(phone_pred_list) if data_label_type != 'phone39': # Convert from num to phone (-> list of phone strings) phone_true_seq = num2phone(att_labels_true[i_batch], phone2num_map_file_path) phone_true_list = phone_true_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_true_list = map_to_39phone( phone_true_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_true_list = phone2num(phone_true_list, phone2num_39_map_file_path) labels_true_phone39.append(phone_true_list) else: labels_true_phone39 = att_labels_true # Compute edit distance labels_true_st = list2sparsetensor(labels_true_phone39, padded_value=eos_index) labels_pred_st = list2sparsetensor(predicted_ids_phone39, padded_value=eos_index) per_local = compute_edit_distance(session, labels_true_st, labels_pred_st) per_global += per_local * batch_size_each per_global /= dataset.data_num return per_global
def do_eval_cer(session, decode_op, network, dataset, eval_batch_size=None, is_progressbar=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of a `Dataset` class eval_batch_size: int, batch size when evaluating the model is_progressbar: if True, visualize the progressbar Return: cer_mean: An average of CER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 map_file_path = '../metrics/mapping_files/attention/char2num.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini-batch inputs, att_labels_true, _, inputs_seq_len, _, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) predicted_ids = session.run(decode_op, feed_dict=feed_dict) for i_batch in range(batch_size_each): # Convert from list to string str_true = num2char(att_labels_true[i_batch], map_file_path) str_pred = num2char(predicted_ids[i_batch], map_file_path) # Remove silence(_) labels str_true = re.sub(r'[_<>,.\'-?!]+', "", str_true) str_pred = re.sub(r'[_<>,.\'-?!]+', "", str_pred) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num return cer_mean
def __init__(self, data_type, train_data_size, label_type_main, label_type_sub, batch_size, num_stack=None, num_skip=None, is_sorted=True, is_progressbar=False, num_gpu=1, is_gpu=True): """A class for loading dataset. Args: data_type: string, train or dev or eval1 or eval2 or eval3 train_data_size: string, default or large label_type_main: string, character or kanji label_type_sub: string, character or phone batch_size: int, the size of mini-batch num_stack: int, the number of frames to stack num_skip: int, the number of frames to skip is_sorted: if True, sort dataset by frame num is_progressbar: if True, visualize progressbar num_gpu: int, if more than 1, divide batch_size by num_gpu is_gpu: bool """ if data_type not in ['train', 'dev', 'eval1', 'eval2', 'eval3']: raise ValueError( 'data_type is "train" or "dev", "eval1", "eval2", "eval3".') self.data_type = data_type self.train_data_size = train_data_size self.label_type_main = label_type_main self.label_type_sub = label_type_sub self.batch_size = batch_size * num_gpu self.num_stack = num_stack self.num_skip = num_skip self.is_sorted = is_sorted self.is_progressbar = is_progressbar self.num_gpu = num_gpu self.input_size = 123 if is_gpu: # GPU input_path = join('/data/inaguma/csj/inputs', train_data_size, data_type) label_main_path = join('/data/inaguma/csj/labels/ctc/', train_data_size, label_type_main, data_type) label_sub_path = join('/data/inaguma/csj/labels/ctc/', train_data_size, label_type_sub, data_type) else: # CPU input_path = join('/n/sd8/inaguma/corpus/csj/dataset/inputs', train_data_size, data_type) label_main_path = join( '/n/sd8/inaguma/corpus/csj/dataset/labels/ctc/', train_data_size, label_type_main, data_type) label_sub_path = join( '/n/sd8/inaguma/corpus/csj/dataset/labels/ctc/', train_data_size, label_type_sub, data_type) # Load the frame number dictionary with open(join(input_path, 'frame_num.pickle'), 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label by frame num print('=> loading paths to dataset...') frame_num_tuple_sorted = sorted(self.frame_num_dict.items(), key=lambda x: x[1]) input_paths, label_main_paths, label_sub_paths = [], [], [] for input_name, frame_num in wrap_iterator(frame_num_tuple_sorted, self.is_progressbar): speaker_name = input_name.split('_')[0] input_paths.append( join(input_path, speaker_name, input_name + '.npy')) label_main_paths.append( join(label_main_path, speaker_name, input_name + '.npy')) label_sub_paths.append( join(label_sub_path, speaker_name, input_name + '.npy')) self.input_paths = np.array(input_paths) self.label_main_paths = np.array(label_main_paths) self.label_sub_paths = np.array(label_sub_paths) self.data_num = len(self.input_paths) if (self.num_stack is not None) and (self.num_skip is not None): self.input_size = self.input_size * num_stack # NOTE: Not load dataset yet self.rest = set(range(0, self.data_num, 1)) if data_type in ['eval1', 'eval2', 'eval3' ] and label_type_sub != 'phone': self.is_test = True else: self.is_test = False
def do_eval_per(session, decode_op, per_op, network, dataset, label_type, eos_index, eval_batch_size=None, is_progressbar=False, is_multitask=False): """Evaluate trained model by Phone Error Rate. Args: session: session of training model decode_op: operation for decoding per_op: operation for computing phone error rate network: network to evaluate dataset: An instance of a `Dataset' class label_type: string, phone39 or phone48 or phone61 eos_index: int, the index of <EOS> class eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize the progressbar is_multitask: if True, evaluate the multitask model Returns: per_mean: An average of PER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size train_label_type = label_type eval_label_type = dataset.label_type num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 per_mean = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) train_phone2num_map_file_path = '../metrics/mapping_files/ctc/' + \ train_label_type + '_to_num.txt' eval_phone2num_map_file_path = '../metrics/mapping_files/ctc/' + \ train_label_type + '_to_num.txt' phone2num_39_map_file_path = '../metrics/mapping_files/ctc/phone39_to_num.txt' phone2phone_map_file_path = '../metrics/mapping_files/phone2phone.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini-batch if not is_multitask: inputs, labels_true, inputs_seq_len, _, _ = mini_batch.__next__() else: inputs, _, labels_true, inputs_seq_len, _, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) # Evaluate by 39 phones predicted_ids = session.run(decode_op, feed_dict=feed_dict) labels_pred_mapped, labels_true_mapped = [], [] for i_batch in range(batch_size_each): ############### # Hypothesis ############### # Convert from num to phone (-> list of phone strings) phone_pred_list = num2phone( predicted_ids[i_batch], train_phone2num_map_file_path).split(' ') # Mapping to 39 phones (-> list of phone strings) phone_pred_list = map_to_39phone(phone_pred_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_pred_list = phone2num(phone_pred_list, phone2num_39_map_file_path) labels_pred_mapped.append(phone_pred_list) ############### # Reference ############### # Convert from num to phone (-> list of phone strings) phone_true_list = num2phone( labels_true[i_batch], eval_phone2num_map_file_path).split(' ') # Mapping to 39 phones (-> list of phone strings) phone_true_list = map_to_39phone(phone_true_list, eval_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_true_list = phone2num(phone_true_list, phone2num_39_map_file_path) labels_true_mapped.append(phone_true_list) # Compute edit distance labels_true_st = list2sparsetensor(labels_true_mapped, padded_value=eos_index) labels_pred_st = list2sparsetensor(labels_pred_mapped, padded_value=eos_index) per_each = compute_edit_distance(session, labels_true_st, labels_pred_st) per_mean += per_each * batch_size_each per_mean /= dataset.data_num return per_mean
def main(): # Make mapping dictionary from kana to phone phone2kana_dict = {} with open('../metrics/mapping_files/kana2phone.txt', 'r') as f: for line in f: line = line.strip().split('+') kana, phone_seq = line phone = re.sub(' ', '', phone_seq) if phone in phone2kana_dict.keys(): continue phone2kana_dict[phone] = kana phone2kana_dict[phone + ':'] = kana + 'ー' # Julius Results for data_type in ['eval1', 'eval2', 'eval3']: results_paths = [path for path in glob( '/home/lab5/inaguma/asru2017/csj_results_0710_kana/' + data_type + '/*.kana')] result_dict = {} for path in results_paths: with codecs.open(path, 'r', 'euc_jp') as f: file_name = '' output = '' for line in f: line = line.strip() if 'wav' in line: file_name = line.split(': ')[-1] file_name = '_'.join(line.split('/')[-2:]) file_name = re.sub('.wav', '', file_name) else: output = line output = re.sub('sp', '', output) result_dict[file_name] = output label_type = 'kana' dataset = Dataset(data_type=data_type, label_type=label_type, batch_size=1, train_data_size='large', is_sorted=False, is_progressbar=True, is_gpu=False) num_examples = dataset.data_num cer_sum = 0 mini_batch = dataset.next_batch(batch_size=1) def map_fn(phone): return phone2kana_dict[phone] for _ in wrap_iterator(range(num_examples), False): # Create feed dictionary for next mini batch _, labels_true, _, input_names = mini_batch.__next__() if input_names[0] not in result_dict.keys(): continue output = result_dict[input_names[0]].split(' ') while '' in output: output.remove('') str_pred = ''.join(list(map(map_fn, output))) if input_names[0] in ['A03M0106_0057', 'A03M0016_0014']: print(str_pred) print(labels_true[0]) print('-----') # Remove silence(_) & noise(NZ) labels str_true = re.sub(r'[_NZー・]+', "", labels_true[0]) str_pred = re.sub(r'[_NZー・]+', "", str_pred) # Compute edit distance cer_each = Levenshtein.distance( str_pred, str_true) / len(list(str_true)) cer_sum += cer_each print('CER (' + data_type + '): %f' % (cer_sum / dataset.data_num))
def stack_frame(input_list, input_paths, frame_num_dict, num_stack, num_skip, is_progressbar=False): """Stack & skip some frames. This implementation is based on https://arxiv.org/abs/1507.06947. Sak, Haşim, et al. "Fast and accurate recurrent neural network acoustic models for speech recognition." arXiv preprint arXiv:1507.06947 (2015). Args: input_list: list of input data input_paths: list of paths to input data frame_num_dict: key => utterance index value => the number of frames num_stack: int, the number of frames to stack num_skip: int, the number of frames to skip is_progressbar: if True, visualize progressbar Returns: stacked_input_list: list of frame-stacked inputs """ if num_stack < num_skip: raise ValueError('num_skip must be less than num_stack.') input_size = input_list[0].shape[1] utt_num = len(input_paths) stacked_input_list = [] for i_utt in wrap_iterator(range(utt_num), is_progressbar): # Per utterance input_name = basename(input_paths[i_utt]).split('.')[0] frame_num = frame_num_dict[input_name] frame_num_decimated = frame_num / num_skip if frame_num_decimated != int(frame_num_decimated): frame_num_decimated += 1 frame_num_decimated = int(frame_num_decimated) stacked_frames = np.zeros( (frame_num_decimated, input_size * num_stack)) stack_count = 0 # counter for stacked_frames stack = [] for i_frame, frame in enumerate(input_list[i_utt]): ##################### # final frame ##################### if i_frame == len(input_list[i_utt]) - 1: # Stack the final frame stack.append(frame) while stack_count != int(frame_num_decimated): # Concatenate stacked frames for i_stack in range(len(stack)): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): if len(stack) != 0: stack.pop(0) ######################## # first & middle frames ######################## elif len(stack) < num_stack: # Stack some frames until stack is filled stack.append(frame) if len(stack) == num_stack: # Concatenate stacked frames for i_stack in range(num_stack): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): stack.pop(0) stacked_input_list.append(stacked_frames) return np.array(stacked_input_list)
def __init__(self, data_type, train_data_size, label_type, batch_size, eos_index, is_sorted=True, is_progressbar=False, num_gpu=1): """A class for loading dataset. Args: data_type: string, train, dev, eval1, eval2, eval3 train_data_size: string, default or large label_type: string, phone or character or kanji batch_size: int, the size of mini-batch eos_index: int , the index of <EOS> class is_sorted: if True, sort dataset by frame num is_progressbar: if True, visualize progressbar num_gpu: int, if more than 1, divide batch_size by num_gpu """ if data_type not in ['train', 'dev', 'eval1', 'eval2', 'eval3']: raise ValueError( 'data_type is "train" or "dev", "eval1" "eval2" "eval3".') self.data_type = data_type self.train_data_size = train_data_size self.label_type = label_type self.batch_size = batch_size * num_gpu self.eos_index = eos_index self.is_sorted = is_sorted self.is_progressbar = is_progressbar self.num_gpu = num_gpu self.input_size = 123 input_path = join('/data/inaguma/csj/inputs', train_data_size, data_type) label_path = join('/data/inaguma/csj/labels/attention/', train_data_size, label_type, data_type) # Load the frame number dictionary with open(join(input_path, 'frame_num.pickle'), 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label by frame num print('=> Loading paths to dataset...') frame_num_tuple_sorted = sorted(self.frame_num_dict.items(), key=lambda x: x[1]) input_paths, label_paths = [], [] for input_name, frame_num in wrap_iterator(frame_num_tuple_sorted, self.is_progressbar): speaker_name = input_name.split('_')[0] input_paths.append( join(input_path, speaker_name, input_name + '.npy')) label_paths.append( join(label_path, speaker_name, input_name + '.npy')) self.input_paths = np.array(input_paths) self.label_paths = np.array(label_paths) self.data_num = len(self.input_paths) self.rest = set(range(0, self.data_num, 1)) if data_type in ['eval1', 'eval2', 'eval3'] and label_type != 'phone': self.is_test = True else: self.is_test = False
def __init__(self, data_type, label_type_main, label_type_sub, batch_size, num_stack=None, num_skip=None, is_sorted=True, is_progressbar=False, num_gpu=1): """A class for loading dataset. Args: data_type: string, train or dev or test label_type_sub: string, phone39 or phone48 or phone61 batch_size: int, the size of mini-batch num_stack: int, the number of frames to stack num_skip: int, the number of frames to skip is_sorted: if True, sort dataset by frame num is_progressbar: if True, visualize progressbar num_gpu: int, if more than 1, divide batch_size by num_gpu """ if data_type not in ['train', 'dev', 'test']: raise ValueError('data_type is "train" or "dev" or "test".') self.data_type = data_type self.label_type_main = 'character' self.label_type_sub = label_type_sub self.batch_size = batch_size * num_gpu self.num_stack = num_stack self.num_skip = num_skip self.is_sorted = is_sorted self.is_progressbar = is_progressbar self.num_gpu = num_gpu self.input_size = 123 input_path = join('/n/sd8/inaguma/corpus/timit/dataset/inputs/', data_type) label_main_path = join( '/n/sd8/inaguma/corpus/timit/dataset/labels/ctc/character/', data_type) label_sub_path = join( '/n/sd8/inaguma/corpus/timit/dataset/labels/ctc/', label_type_sub, data_type) # Load the frame number dictionary with open(join(input_path, 'frame_num.pickle'), 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label by frame num frame_num_tuple_sorted = sorted(self.frame_num_dict.items(), key=lambda x: x[1]) input_paths, label_main_paths, label_sub_paths = [], [], [] for input_name, frame_num in frame_num_tuple_sorted: input_paths.append(join(input_path, input_name + '.npy')) label_main_paths.append(join(label_main_path, input_name + '.npy')) label_sub_paths.append(join(label_sub_path, input_name + '.npy')) if len(label_main_paths) != len(label_sub_paths): raise ValueError('The numbers of labels between ' + 'character and phone are not same.') self.input_paths = np.array(input_paths) self.label_main_paths = np.array(label_main_paths) self.label_sub_paths = np.array(label_sub_paths) self.data_num = len(self.input_paths) # Load all dataset in advance print('=> Loading ' + data_type + ' dataset (' + label_type_sub + ')...') input_list, label_main_list, label_sub_list = [], [], [] for i in wrap_iterator(range(self.data_num), self.is_progressbar): input_list.append(np.load(self.input_paths[i])) label_main_list.append(np.load(self.label_main_paths[i])) label_sub_list.append(np.load(self.label_sub_paths[i])) self.input_list = np.array(input_list) self.label_main_list = np.array(label_main_list) self.label_sub_list = np.array(label_sub_list) # Frame stacking if (num_stack is not None) and (num_skip is not None): print('=> Stacking frames...') self.input_list = stack_frame(self.input_list, self.input_paths, self.frame_num_dict, num_stack, num_skip, is_progressbar) self.input_size = self.input_size * num_stack self.rest = set(range(0, self.data_num, 1))
def do_eval_cer(session, decode_op, network, dataset, label_type, is_test=None, eval_batch_size=None, is_progressbar=False, is_multitask=False, is_main=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of `Dataset` class label_type: string, kanji or kana or phone is_test: bool, set to True when evaluating by the test set eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize progressbar is_multitask: if True, evaluate the multitask model is_main: if True, evaluate the main task Return: cer_mean: An average of CER """ if eval_batch_size is None: batch_size = dataset.batch_size else: batch_size = eval_batch_size num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) if label_type == 'kanji': map_file_path = '../metrics/mapping_files/ctc/kanji2num.txt' elif label_type == 'kana': map_file_path = '../metrics/mapping_files/ctc/kana2num.txt' elif label_type == 'phone': map_file_path == '../metrics/mapping_files/ctc/phone2num.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true, inputs_seq_len, _ = mini_batch.__next__() else: if is_main: inputs, labels_true, _, inputs_seq_len, _ = mini_batch.__next__( ) else: inputs, _, labels_true, inputs_seq_len, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): # Convert from list to string if label_type != 'phone' and is_test: str_true = ''.join(labels_true[i_batch]) # NOTE: 漢字とかなの場合はテストデータのラベルはそのまま保存してある else: str_true = num2char(labels_true[i_batch], map_file_path) str_pred = num2char(labels_pred[i_batch], map_file_path) # Remove silence(_) & noise(NZ) labels str_true = re.sub(r'[_NZー]+', "", str_true) str_pred = re.sub(r'[_NZー]+', "", str_pred) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num return cer_mean