def do_eval_per(session, per_op, network, dataset, eval_batch_size=None, is_progressbar=False, is_multitask=False): """Evaluate trained model by Phone Error Rate. Args: session: session of training model per_op: operation for computing phone error rate network: network to evaluate dataset: An instance of a `Dataset' class eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize progressbar is_multitask: if True, evaluate the multitask model Returns: per_global: An average of PER """ if eval_batch_size is None: batch_size = network.batch_size else: batch_size = eval_batch_size num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 per_global = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__() else: inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) per_local = session.run(per_op, feed_dict=feed_dict) per_global += per_local * batch_size_each per_global /= dataset.data_num return per_global
def __init__(self, data_type, label_type, batch_size, eos_index, max_epoch=None, splice=1, num_stack=1, num_skip=1, shuffle=False, sort_utt=False, sort_stop_epoch=None, progressbar=False): """A class for loading dataset. Args: data_type (string): train or dev or test label_type (string): stirng, phone39 or phone48 or phone61 or character or character_capital_divide batch_size (int): the size of mini-batch eos_index (int): the index of <EOS> class max_epoch (int, optional): the max epoch. None means infinite loop. splice (int, optional): frames to splice. Default is 1 frame. num_stack (int, optional): the number of frames to stack num_skip (int, optional): the number of frames to skip shuffle (bool, optional): if True, shuffle utterances. This is disabled when sort_utt is True. sort_utt (bool, optional): if True, sort all utterances by the number of frames and utteraces in each mini-batch are shuffled. Otherwise, shuffle utteraces. sort_stop_epoch (int, optional): After sort_stop_epoch, training will revert back to a random order progressbar (bool, optional): if True, visualize progressbar """ if data_type not in ['train', 'dev', 'test']: raise TypeError('data_type must be "train" or "dev" or "test".') if label_type not in [ 'phone39', 'phone48', 'phone61', 'character', 'character_capital_divide' ]: raise TypeError( 'label_type must be "phone39" or "phone48" or "phone61" or ' + '"character" or "character_capital_divide".') super(Dataset, self).__init__() self.data_type = data_type self.label_type = label_type self.batch_size = batch_size self.max_epoch = max_epoch self.eos_index = eos_index self.splice = splice self.num_stack = num_stack self.num_skip = num_skip self.shuffle = shuffle self.sort_utt = sort_utt self.sort_stop_epoch = sort_stop_epoch self.progressbar = progressbar self.padded_value = eos_index input_path = join( '/n/sd8/inaguma/corpus/timit/dataset/inputs/htk/speaker', data_type) label_path = join( '/n/sd8/inaguma/corpus/timit/dataset/labels/attention', label_type, data_type) # Load the frame number dictionary with open(join(input_path, 'frame_num.pickle'), 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label axis = 1 if sort_utt else 0 frame_num_tuple_sorted = sorted(self.frame_num_dict.items(), key=lambda x: x[axis]) input_paths, label_paths = [], [] for input_name, frame_num in frame_num_tuple_sorted: input_paths.append(join(input_path, input_name + '.npy')) label_paths.append(join(label_path, input_name + '.npy')) self.input_paths = np.array(input_paths) self.label_paths = np.array(label_paths) # Load all dataset in advance print('=> Loading dataset (%s, %s)...' % (data_type, label_type)) input_list, label_list = [], [] for i in wrap_iterator(range(len(self.input_paths)), self.progressbar): input_list.append(np.load(self.input_paths[i])) label_list.append(np.load(self.label_paths[i])) self.input_list = np.array(input_list) self.label_list = np.array(label_list) # Frame stacking print('=> Stacking frames...') self.input_list = stack_frame(self.input_list, self.input_paths, self.frame_num_dict, num_stack, num_skip, progressbar) self.rest = set(range(0, len(self.input_paths), 1))
def __init__(self, data_type, label_type_second, batch_size, num_stack=None, num_skip=None, is_sorted=True, is_progressbar=False, num_gpu=1): """ Args: data_type: string, train or dev or test label_type_second: string, phone39 or phone48 or phone61 batch_size: int, the size of mini-batch num_stack: int, the number of frames to stack num_skip: int, the number of frames to skip is_sorted: if True, sort dataset by frame num is_progressbar: if True, visualize progressbar num_gpu: int, if more than 1, divide batch_size by num_gpu """ if data_type not in ['train', 'dev', 'test']: raise ValueError('data_type is "train" or "dev" or "test".') self.data_type = data_type self.label_type_second = label_type_second self.batch_size = batch_size * num_gpu self.num_stack = num_stack self.num_skip = num_skip self.is_sorted = is_sorted self.is_progressbar = is_progressbar self.num_gpu = num_gpu self.input_size = 123 self.dataset_char_path = join( '/n/sd8/inaguma/corpus/timit/dataset/ctc/character', data_type) self.dataset_phone_path = join( '/n/sd8/inaguma/corpus/timit/dataset/ctc/', label_type_second, data_type) # Load the frame number dictionary self.frame_num_dict_path = join( self.dataset_char_path, 'frame_num.pickle') with open(self.frame_num_dict_path, 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label by frame num self.frame_num_tuple_sorted = sorted( self.frame_num_dict.items(), key=lambda x: x[1]) input_paths, label_char_paths, label_phone_paths = [], [], [] for input_name, frame_num in self.frame_num_tuple_sorted: input_paths.append(join( self.dataset_char_path, 'input', input_name + '.npy')) label_char_paths.append(join( self.dataset_char_path, 'label', input_name + '.npy')) label_phone_paths.append(join( self.dataset_phone_path, 'label', input_name + '.npy')) if len(label_char_paths) != len(label_phone_paths): raise ValueError( 'The numbers of labels between ' + 'character and phone are not same.') self.input_paths = np.array(input_paths) self.label_char_paths = np.array(label_char_paths) self.label_phone_paths = np.array(label_phone_paths) self.data_num = len(self.input_paths) # Load all dataset in advance print('=> Loading ' + data_type + ' dataset (' + label_type_second + ')...') input_list, label_char_list, label_phone_list = [], [], [] for i in wrap_iterator(range(self.data_num), self.is_progressbar): input_list.append(np.load(self.input_paths[i])) label_char_list.append(np.load(self.label_char_paths[i])) label_phone_list.append(np.load(self.label_phone_paths[i])) self.input_list = np.array(input_list) self.label_char_list = np.array(label_char_list) self.label_phone_list = np.array(label_phone_list) # Frame stacking if (num_stack is not None) and (num_skip is not None): print('=> Stacking frames...') stacked_input_list = stack_frame(self.input_list, self.input_paths, self.frame_num_dict, num_stack, num_skip, is_progressbar) self.input_list = np.array(stacked_input_list) self.input_size = self.input_size * num_stack self.rest = set([i for i in range(self.data_num)])
def stack_frame(input_list, num_stack, num_skip, progressbar=False): """Stack & skip some frames. This implementation is based on https://arxiv.org/abs/1507.06947. Sak, Haşim, et al. "Fast and accurate recurrent neural network acoustic models for speech recognition." arXiv preprint arXiv:1507.06947 (2015). Args: input_list (list): list of input data num_stack (int): the number of frames to stack num_skip (int): the number of frames to skip progressbar (bool, optional): if True, visualize progressbar Returns: input_list_new (list): list of frame-stacked inputs """ if num_stack == 1 and num_stack == 1: return input_list if num_stack < num_skip: raise ValueError('num_skip must be less than num_stack.') batch_size = len(input_list) input_list_new = [] for i_batch in wrap_iterator(range(batch_size), progressbar): frame_num, input_size = input_list[i_batch].shape frame_num_new = math.ceil(frame_num / num_skip) stacked_frames = np.zeros((frame_num_new, input_size * num_stack)) stack_count = 0 # counter stack = [] for t, frame_t in enumerate(input_list[i_batch]): ##################### # final frame ##################### if t == len(input_list[i_batch]) - 1: # Stack the final frame stack.append(frame_t) while stack_count != int(frame_num_new): # Concatenate stacked frames for i_stack in range(len(stack)): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): if len(stack) != 0: stack.pop(0) ######################## # first & middle frames ######################## elif len(stack) < num_stack: # Stack some frames until stack is filled stack.append(frame_t) if len(stack) == num_stack: # Concatenate stacked frames for i_stack in range(num_stack): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): stack.pop(0) input_list_new.append(stacked_frames) return np.array(input_list_new)
def do_eval_cer(session, decode_op, network, dataset, label_type, is_test=None, eval_batch_size=None, is_progressbar=False, is_multitask=False, is_main=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of `Dataset` class label_type: string, character or kanji is_test: set to True when evaluating by the test set eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize progressbar is_multitask: if True, evaluate the multitask model is_main: if True, evaluate the main task Return: cer_mean: An average of CER """ if eval_batch_size is None: batch_size = network.batch_size else: batch_size = eval_batch_size num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) if label_type == 'character': map_file_path = '../metric/mapping_files/ctc/char2num.txt' elif label_type == 'kanji': map_file_path = '../metric/mapping_files/ctc/kanji2num.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__() else: if is_main: inputs, labels_true_st, _, inputs_seq_len, _ = mini_batch.__next__( ) else: inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size_each) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): # Convert from list to string str_pred = num2char(labels_pred[i_batch], map_file_path) # TODO: change in case of character if label_type == 'kanji' and is_test: str_true = ''.join(labels_true[i_batch]) # NOTE* 漢字の場合はテストデータのラベルはそのまま保存してある else: str_true = num2char(labels_true[i_batch], map_file_path) # Remove silence(_) labels str_true = re.sub(r'[_]+', "", str_true) str_pred = re.sub(r'[_]+', "", str_pred) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num return cer_mean
def __init__(self, data_type, train_data_size, label_type_main, label_type_second, batch_size, num_stack=None, num_skip=None, is_sorted=True, is_progressbar=False, num_gpu=1): """ Args: data_type: string, train or dev or eval1 or eval2 or eval3 train_data_size: string, default or large label_type_main: string, character or kanji label_type_second: string, character or phone batch_size: int, the size of mini-batch num_stack: int, the number of frames to stack num_skip: int, the number of frames to skip is_sorted: if True, sort dataset by frame num is_progressbar: if True, visualize progressbar num_gpu: int, if more than 1, divide batch_size by num_gpu """ if data_type not in ['train', 'dev', 'eval1', 'eval2', 'eval3']: raise ValueError( 'data_type is "train" or "dev", "eval1", "eval2", "eval3".') self.data_type = data_type self.train_data_size = train_data_size self.label_type_main = label_type_main self.label_type_second = label_type_second self.batch_size = batch_size * num_gpu self.num_stack = num_stack self.num_skip = num_skip self.is_sorted = is_sorted self.is_progressbar = is_progressbar self.num_gpu = num_gpu self.input_size = 123 self.input_size = self.input_size self.dataset_main_path = join( '/n/sd8/inaguma/corpus/csj/dataset/monolog/ctc/', label_type_main, train_data_size, data_type) self.dataset_second_path = join( '/n/sd8/inaguma/corpus/csj/dataset/monolog/ctc/', label_type_second, train_data_size, data_type) # Load the frame number dictionary self.frame_num_dict_path = join(self.dataset_main_path, 'frame_num.pickle') with open(self.frame_num_dict_path, 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label by frame num print('=> loading paths to dataset...') self.frame_num_tuple_sorted = sorted(self.frame_num_dict.items(), key=lambda x: x[1]) input_paths, label_main_paths, label_second_paths = [], [], [] for input_name, frame_num in wrap_iterator(self.frame_num_tuple_sorted, self.is_progressbar): speaker_name = input_name.split('_')[0] input_paths.append( join(self.dataset_main_path, 'input', speaker_name, input_name + '.npy')) label_main_paths.append( join(self.dataset_main_path, 'label', speaker_name, input_name + '.npy')) label_second_paths.append( join(self.dataset_second_path, 'label', speaker_name, input_name + '.npy')) self.input_paths = np.array(input_paths) self.label_main_paths = np.array(label_main_paths) self.label_second_paths = np.array(label_second_paths) self.data_num = len(self.input_paths) if (self.num_stack is not None) and (self.num_skip is not None): self.input_size = self.input_size * num_stack # NOTE: Not load dataset yet self.rest = set([i for i in range(self.data_num)])
def do_eval_per(session, decode_op, per_op, network, dataset, train_label_type, eval_batch_size=None, is_progressbar=False, is_multitask=False): """Evaluate trained model by Phone Error Rate. Args: session: session of training model decode_op: operation for decoding per_op: operation for computing phone error rate network: network to evaluate dataset: An instance of a `Dataset' class train_label_type: string, phone39 or phone48 or phone61 eval_batch_size: int, the batch size when evaluating the model is_progressbar: if True, visualize the progressbar is_multitask: if True, evaluate the multitask model Returns: per_global: An average of PER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size data_label_type = dataset.label_type num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 per_global = 0 # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) phone2num_map_file_path = '../metric/mapping_files/ctc/phone2num_' + \ train_label_type[5:7] + '.txt' phone2num_39_map_file_path = '../metric/mapping_files/ctc/phone2num_39.txt' phone2phone_map_file_path = '../metric/mapping_files/phone2phone.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__() else: inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) if False: # Evaluate by the same phones as phones used when training per_local = session.run(per_op, feed_dict=feed_dict) per_global += per_local * batch_size_each else: # Evaluate by 39 phones labels_pred_st = session.run(decode_op, feed_dict=feed_dict) labels_true = sparsetensor2list(labels_true_st, batch_size_each) labels_pred = sparsetensor2list(labels_pred_st, batch_size_each) for i_batch in range(batch_size_each): # Convert from num to phone (-> list of phone strings) phone_pred_seq = num2phone(labels_pred[i_batch], phone2num_map_file_path) phone_pred_list = phone_pred_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_pred_list = map_to_39phone(phone_pred_list, train_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_pred_list = phone2num(phone_pred_list, phone2num_39_map_file_path) labels_pred[i_batch] = phone_pred_list if data_label_type != 'phone39': # Convert from num to phone (-> list of phone strings) phone_true_seq = num2phone(labels_true[i_batch], phone2num_map_file_path) phone_true_list = phone_true_seq.split(' ') # Mapping to 39 phones (-> list of phone strings) phone_true_list = map_to_39phone( phone_true_list, data_label_type, phone2phone_map_file_path) # Convert from phone to num (-> list of phone indices) phone_true_list = phone2num(phone_true_list, phone2num_39_map_file_path) labels_true[i_batch] = phone_true_list # Compute edit distance labels_true_st = list2sparsetensor(labels_true) labels_pred_st = list2sparsetensor(labels_pred) per_local = compute_edit_distance(session, labels_true_st, labels_pred_st) per_global += per_local * batch_size_each per_global /= dataset.data_num return per_global
def __init__(self, data_type, label_type, batch_size, eos_index, is_sorted=True, is_progressbar=False, num_gpu=1): """ Args: data_type: string, train or dev or test label_type: string, phone39 or phone48 or phone61 or character eos_index: int , the index of <EOS> class is_sorted: if True, sort dataset by frame num is_progressbar: if True, visualize progressbar num_gpu: int, if more than 1, divide batch_size by num_gpu """ if data_type not in ['train', 'dev', 'test']: raise ValueError('data_type is "train" or "dev" or "test".') self.data_type = data_type self.label_type = label_type self.batch_size = batch_size * num_gpu self.eos_index = eos_index self.is_sorted = is_sorted self.is_progressbar = is_progressbar self.num_gpu = num_gpu self.input_size = 123 self.dataset_path = join( '/n/sd8/inaguma/corpus/timit/dataset/attention/', label_type, data_type) # Load the frame number dictionary self.frame_num_dict_path = join(self.dataset_path, 'frame_num_dict.pickle') with open(self.frame_num_dict_path, 'rb') as f: self.frame_num_dict = pickle.load(f) # Sort paths to input & label by frame num self.frame_num_tuple_sorted = sorted(self.frame_num_dict.items(), key=lambda x: x[1]) input_paths, label_paths = [], [] for input_name, frame_num in self.frame_num_tuple_sorted: input_paths.append( join(self.dataset_path, 'input', input_name + '.npy')) label_paths.append( join(self.dataset_path, 'label', input_name + '.npy')) self.input_paths = np.array(input_paths) self.label_paths = np.array(label_paths) self.data_num = len(self.input_paths) # Load all dataset in advance print('=> Loading ' + data_type + ' dataset (' + label_type + ')...') input_list, label_list = [], [] for i in wrap_iterator(range(self.data_num), self.is_progressbar): input_list.append(np.load(self.input_paths[i])) label_list.append(np.load(self.label_paths[i])) self.input_list = np.array(input_list) self.label_list = np.array(label_list) self.rest = set([i for i in range(self.data_num)])
def stack_frame(input_list, num_stack, num_skip, progressbar=False): """Stack & skip some frames. This implementation is based on https://arxiv.org/abs/1507.06947. Sak, Haşim, et al. "Fast and accurate recurrent neural network acoustic models for speech recognition." arXiv preprint arXiv:1507.06947 (2015). Args: input_list (list): list of input data num_stack (int): the number of frames to stack num_skip (int): the number of frames to skip progressbar (bool, optional): if True, visualize progressbar Returns: input_list_new (list): list of frame-stacked inputs """ if num_stack == 1 and num_stack == 1: return input_list if num_stack < num_skip: raise ValueError('num_skip must be less than num_stack.') batch_size = len(input_list) input_list_new = [] for i_batch in wrap_iterator(range(batch_size), progressbar): frame_num, input_size = input_list[i_batch].shape frame_num_new = int(math.ceil(frame_num / num_skip)) stacked_frames = np.zeros((frame_num_new, input_size * num_stack)) stack_count = 0 # counter stack = [] for t, frame_t in enumerate(input_list[i_batch]): ##################### # final frame ##################### if t == len(input_list[i_batch]) - 1: # Stack the final frame stack.append(frame_t) while stack_count != int(frame_num_new): # Concatenate stacked frames for i_stack in range(len(stack)): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): if len(stack) != 0: stack.pop(0) ######################## # first & middle frames ######################## elif len(stack) < num_stack: # Stack some frames until stack is filled stack.append(frame_t) if len(stack) == num_stack: # Concatenate stacked frames for i_stack in range(num_stack): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): stack.pop(0) input_list_new.append(stacked_frames) return np.array(input_list_new)
def stack_frame(input_list, input_paths, frame_num_dict, num_stack, num_skip, progressbar=False): """Stack & skip some frames. This implementation is based on https://arxiv.org/abs/1507.06947. Sak, Haşim, et al. "Fast and accurate recurrent neural network acoustic models for speech recognition." arXiv preprint arXiv:1507.06947 (2015). Args: input_list (list): list of input data input_paths (list): paths to input data. This is used to get the number of frames from frame_num_dict. frame_num_dict (dict): key (string) => utterance index value (int) => the number of frames num_stack (int): the number of frames to stack num_skip (int): the number of frames to skip progressbar (bool, optional): if True, visualize progressbar Returns: stacked_input_list (list): list of frame-stacked inputs """ if num_stack == 1 and num_stack == 1: return input_list if num_stack < num_skip: raise ValueError('num_skip must be less than num_stack.') input_size = input_list[0].shape[1] utt_num = len(input_paths) stacked_input_list = [] for i_utt in wrap_iterator(range(utt_num), progressbar): # Per utterance input_name = basename(input_paths[i_utt]).split('.')[0] frame_num = frame_num_dict[input_name] frame_num_decimated = frame_num / num_skip if frame_num_decimated != int(frame_num_decimated): frame_num_decimated += 1 frame_num_decimated = int(frame_num_decimated) stacked_frames = np.zeros( (frame_num_decimated, input_size * num_stack)) stack_count = 0 # counter for stacked_frames stack = [] for i_frame, frame in enumerate(input_list[i_utt]): ##################### # final frame ##################### if i_frame == len(input_list[i_utt]) - 1: # Stack the final frame stack.append(frame) while stack_count != int(frame_num_decimated): # Concatenate stacked frames for i_stack in range(len(stack)): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): if len(stack) != 0: stack.pop(0) ######################## # first & middle frames ######################## elif len(stack) < num_stack: # Stack some frames until stack is filled stack.append(frame) if len(stack) == num_stack: # Concatenate stacked frames for i_stack in range(num_stack): stacked_frames[stack_count][input_size * i_stack:input_size * (i_stack + 1)] = stack[i_stack] stack_count += 1 # Delete some frames to skip for _ in range(num_skip): stack.pop(0) stacked_input_list.append(stacked_frames) return np.array(stacked_input_list)
def do_eval_cer(session, decode_op, network, dataset, eval_batch_size=None, is_progressbar=False, is_multitask=False): """Evaluate trained model by Character Error Rate. Args: session: session of training model decode_op: operation for decoding network: network to evaluate dataset: An instance of a `Dataset` class eval_batch_size: int, batch size when evaluating the model is_progressbar: if True, visualize the progressbar is_multitask: if True, evaluate the multitask model Return: cer_mean: An average of CER """ if eval_batch_size is not None: batch_size = eval_batch_size else: batch_size = dataset.batch_size # Make data generator mini_batch = dataset.next_batch(batch_size=batch_size) num_examples = dataset.data_num iteration = int(num_examples / batch_size) if (num_examples / batch_size) != int(num_examples / batch_size): iteration += 1 cer_sum = 0 map_file_path = '../metric/mapping_files/attention/char2num.txt' for step in wrap_iterator(range(iteration), is_progressbar): # Create feed dictionary for next mini batch if not is_multitask: inputs, labels_true, inputs_seq_len, _, _ = mini_batch.__next__() else: inputs, labels_true, _, inputs_seq_len, _, _ = mini_batch.__next__( ) feed_dict = { network.inputs: inputs, network.inputs_seq_len: inputs_seq_len, network.keep_prob_input: 1.0, network.keep_prob_hidden: 1.0 } batch_size_each = len(inputs_seq_len) predicted_ids = session.run(decode_op, feed_dict=feed_dict) for i_batch in range(batch_size_each): # Convert from list to string str_true = num2char(labels_true[i_batch], map_file_path) str_pred = num2char(predicted_ids[i_batch], map_file_path) # Remove silence(_) labels str_true = re.sub(r'[_<>]+', "", str_true) str_pred = re.sub(r'[_]+', "", str_pred) print(str_true) print(str_pred) # Compute edit distance cer_each = Levenshtein.distance(str_pred, str_true) / len( list(str_true)) cer_sum += cer_each cer_mean = cer_sum / dataset.data_num return cer_mean