def plot_probs(model, dataset, eval_batch_size, save_path=None): """Plot CTC posteriors. Args: model: model to evaluate dataset: An instance of a `Dataset` class eval_batch_size (int): the batch size when evaluating the model save_path (string): path to save figures of CTC posteriors """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) for batch, is_new_epoch in dataset: # Get CTC probs probs = model.posteriors(batch['xs'], batch['x_lens'], temperature=1) # NOTE: probs: '[B, T, num_classes]' # Visualize for b in range(len(batch['xs'])): plot_ctc_probs( probs[b, : batch['x_lens'][b], :], frame_num=batch['x_lens'][b], num_stack=dataset.num_stack, spectrogram=batch['xs'][b, :, :40], save_path=join(save_path, batch['input_names'][b] + '.png'), figsize=(14, 7)) if is_new_epoch: break
def plot(model, dataset, beam_width, eval_batch_size=None, save_path=None): """Visualize attention weights of attetnion-based model. Args: model: model to evaluate dataset: An instance of a `Dataset` class beam_width: (int): the size of beam eval_batch_size (int, optional): the batch size when evaluating the model save_path (string, optional): path to save attention weights plotting """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) if 'char' in dataset.label_type: map_fn = Idx2char(dataset.vocab_file_path, capital_divide=dataset.label_type == 'character_capital_divide', return_list=True) max_decode_len = MAX_DECODE_LEN_CHAR else: map_fn = Idx2word(dataset.vocab_file_path, return_list=True) max_decode_len = MAX_DECODE_LEN_WORD for batch, is_new_epoch in dataset: # Decode best_hyps, aw, perm_idx = model.attention_weights( batch['xs'], batch['x_lens'], beam_width=beam_width, max_decode_len=max_decode_len) ys = batch['ys'][perm_idx] y_lens = batch['y_lens'][perm_idx] for b in range(len(batch['xs'])): ############################## # Reference ############################## if dataset.is_test: str_ref = ys[b][0] # NOTE: transcript is seperated by space('_') else: # Convert from list of index to string str_ref = map_fn(ys[b][:y_lens[b]]) token_list = map_fn(best_hyps[b]) speaker = '_'.join(batch['input_names'][b].split('_')[:2]) plot_attention_weights( aw[b, :len(token_list), :batch['x_lens'][b]], label_list=token_list, spectrogram=batch['xs'][b, :, :dataset.input_freq], str_ref=str_ref, save_path=mkdir_join(save_path, speaker, batch['input_names'][b] + '.png'), figsize=(20, 8)) if is_new_epoch: break
def plot(model, dataset, eval_batch_size, save_path=None): """ Args: model: the model to evaluate dataset: An instance of a `Dataset` class eval_batch_size (int): the batch size when evaluating the model save_path (string): path to save figures of CTC posteriors """ # Set batch size in the evaluation if eval_batch_size is not None: dataset.batch_size = eval_batch_size # Clean directory if isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) idx2word = Idx2word(dataset.vocab_file_path) idx2char = Idx2char( dataset.vocab_file_path, capital_divide=dataset.label_type_sub == 'character_capital_divide') for batch, is_new_epoch in dataset: # Get CTC probs probs = model.posteriors(batch['xs'], batch['x_lens'], temperature=1) probs_sub = model.posteriors(batch['xs'], batch['x_lens'], is_sub_task=True, temperature=1) # NOTE: probs: '[B, T, num_classes]' # NOTE: probs_sub: '[B, T, num_classes_sub]' # Decode best_hyps = model.decode(batch['xs'], batch['x_lens'], beam_width=1) best_hyps_sub = model.decode(batch['xs'], batch['x_lens'], beam_width=1, is_sub_task=True) # Visualize for b in range(len(batch['xs'])): # Convert from list of index to string str_hyp = idx2word(best_hyps[b]) str_hyp_sub = idx2char(best_hyps_sub[b]) speaker = batch['input_names'][b].split('_')[0] plot_hierarchical_ctc_probs(probs[b, :batch['x_lens'][b], :], probs_sub[b, :batch['x_lens'][b], :], frame_num=batch['x_lens'][b], num_stack=dataset.num_stack, str_hyp=str_hyp, str_hyp_sub=str_hyp_sub, save_path=mkdir_join( save_path, speaker, batch['input_names'][b] + '.png')) if is_new_epoch: break
def plot_attention(model, dataset, eval_batch_size, beam_width, length_penalty, save_path=None): """Visualize attention weights of the attetnion-based model. Args: model: model to evaluate dataset: An instance of a `Dataset` class eval_batch_size (int): the batch size when evaluating the model beam_width: (int): the size of beam length_penalty (float): save_path (string, optional): path to save attention weights plotting """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) idx2phone = Idx2phone(dataset.vocab_file_path) for batch, is_new_epoch in dataset: # Decode best_hyps, aw, perm_idx = model.decode( batch['xs'], batch['x_lens'], beam_width=beam_width, max_decode_len=MAX_DECODE_LEN_PHONE, length_penalty=length_penalty) ys = batch['ys'][perm_idx] y_lens = batch['y_lens'][perm_idx] for b in range(len(batch['xs'])): ############################## # Reference ############################## if dataset.is_test: str_ref = ys[b][0] # NOTE: transcript is seperated by space(' ') else: # Convert from list of index to string str_ref = idx2phone(ys[b][:y_lens[b]]) token_list = idx2phone(best_hyps[b]) plot_attention_weights( aw[b][:len(token_list), :batch['x_lens'][b]], label_list=token_list, spectrogram=batch['xs'][b, :, :40], str_ref=str_ref, save_path=join(save_path, batch['input_names'][b] + '.png'), figsize=(20, 8)) if is_new_epoch: break
def plot(model, dataset, eval_batch_size=None, save_path=None, space_index=None): """ Args: model: the model to evaluate dataset: An instance of a `Dataset` class eval_batch_size (int, optional): the batch size when evaluating the model save_path (string): path to save figures of CTC posteriors space_index (int, optional): """ # Set batch size in the evaluation if eval_batch_size is not None: dataset.batch_size = eval_batch_size # Clean directory if isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) vocab_file_path = '../metrics/vocab_files/' + \ dataset.label_type + '_' + dataset.data_size + '.txt' if dataset.label_type == 'character': map_fn = Idx2char(vocab_file_path) elif dataset.label_type == 'character_capital_divide': map_fn = Idx2char(vocab_file_path, capital_divide=True) else: map_fn = Idx2word(vocab_file_path) for batch, is_new_epoch in dataset: # Get CTC probs probs = model.posteriors(batch['xs'], batch['x_lens'], temperature=1) # NOTE: probs: '[B, T, num_classes]' # Decode best_hyps _ = model.decode(batch['xs'], batch['x_lens'], beam_width=1) # Visualize for b in range(len(batch['xs'])): # Convert from list of index to string str_pred = map_fn(best_hyps[b]) speaker, book = batch['input_names'][b].split('-')[:2] plot_ctc_probs( probs[b, :batch['x_lens'][b], :], frame_num=batch['x_lens'][b], num_stack=dataset.num_stack, space_index=space_index, str_pred=str_pred, save_path=mkdir_join(save_path, speaker, book, batch['input_names'][b] + '.png')) if is_new_epoch: break
def plot(model, dataset, eval_batch_size, beam_width, beam_width_sub, length_penalty, save_path=None): """Visualize attention weights of Attetnion-based model. Args: model: model to evaluate dataset: An instance of a `Dataset` class eval_batch_size (int): the batch size when evaluating the model beam_width: (int): the size of beam in the main task beam_width_sub: (int): the size of beam in the sub task length_penalty (float): save_path (string, optional): path to save attention weights plotting """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) map_fn_main = Idx2word(dataset.vocab_file_path, return_list=True) map_fn_sub = Idx2char(dataset.vocab_file_path_sub, return_list=True) for batch, is_new_epoch in dataset: # Decode best_hyps, aw, perm_idx = model.decode( batch['xs'], batch['x_lens'], beam_width=beam_width, max_decode_len=MAX_DECODE_LEN_WORD) best_hyps_sub, aw_sub, _ = model.decode( batch['xs'], batch['x_lens'], beam_width=beam_width_sub, max_decode_len=MAX_DECODE_LEN_CHAR, task_index=1) for b in range(len(batch['xs'])): word_list = map_fn_main(best_hyps[b]) char_list = map_fn_sub(best_hyps_sub[b]) speaker = batch['input_names'][b].split('_')[0] plot_hierarchical_attention_weights( aw[b][:len(word_list), :batch['x_lens'][b]], aw_sub[b][:len(char_list), :batch['x_lens'][b]], label_list=word_list, label_list_sub=char_list, spectrogram=batch['xs'][b, :, :dataset.input_freq], save_path=mkdir_join(save_path, speaker, batch['input_names'][b] + '.png'), figsize=(40, 8) ) if is_new_epoch: break
def set_save_path(self, save_path): # Reset model directory model_index = 0 save_path_tmp = save_path while True: if isfile(join(save_path_tmp, 'complete.txt')): # Training of the first model have been finished model_index += 1 save_path_tmp = save_path + '_' + str(model_index) elif isfile(join(save_path_tmp, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 save_path_tmp = save_path + '_' + str(model_index) else: break self.save_path = mkdir(save_path_tmp)
def main(config_path, model_save_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type_main'] == 'character': params['num_classes_main'] = 28 elif params['label_type_main'] == 'character_capital_divide': params['num_classes_main'] = 72 if params['label_type_sub'] == 'phone61': params['num_classes_sub'] = 61 elif params['label_type_sub'] == 'phone48': params['num_classes_sub'] = 48 elif params['label_type_sub'] == 'phone39': params['num_classes_sub'] = 39 # Model setting model = Multitask_CTC(encoder_type=params['encoder_type'], input_size=params['input_size'] * params['num_stack'], num_units=params['num_units'], num_layers_main=params['num_layers_main'], num_layers_sub=params['num_layers_sub'], num_classes_main=params['num_classes_main'], num_classes_sub=params['num_classes_sub'], main_task_weight=params['main_task_weight'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad=params['clip_grad'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle('timit_' + model.name + '_' + params['label_type_main'] + '_' + params['label_type_sub']) model.name += '_' + str(params['num_units']) model.name += '_main' + str(params['num_layers_main']) model.name += '_sub' + str(params['num_layers_sub']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout_input'] != 1: model.name += '_dropi' + str(params['dropout_input']) if params['dropout_hidden'] != 1: model.name += '_droph' + str(params['dropout_hidden']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) model.name += '_main' + str(params['main_task_weight']) # Set save path model.save_path = mkdir_join(model_save_path, 'ctc', 'char_' + params['label_type_sub'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet # tf.gfile.DeleteRecursively(new_model_path) # tf.gfile.MakeDirs(new_model_path) # break model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params)
def plot(model, dataset, beam_width, beam_width_sub, eval_batch_size=None, a2c_oracle=False, save_path=None): """Visualize attention weights of Attetnion-based model. Args: model: model to evaluate dataset: An instance of a `Dataset` class beam_width: (int): the size of beam i nteh main task beam_width_sub: (int): the size of beam in the sub task eval_batch_size (int, optional): the batch size when evaluating the model a2c_oracle (bool, optional): save_path (string, optional): path to save attention weights plotting """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) idx2word = Idx2word(dataset.vocab_file_path, return_list=True) idx2char = Idx2char(dataset.vocab_file_path_sub, return_list=True) for batch, is_new_epoch in dataset: batch_size = len(batch['xs']) if a2c_oracle: if dataset.is_test: max_label_num = 0 for b in range(batch_size): if max_label_num < len(list(batch['ys_sub'][b][0])): max_label_num = len(list(batch['ys_sub'][b][0])) ys_sub = np.zeros((batch_size, max_label_num), dtype=np.int32) ys_sub -= 1 # pad with -1 y_lens_sub = np.zeros((batch_size, ), dtype=np.int32) for b in range(batch_size): indices = char2idx(batch['ys_sub'][b][0]) ys_sub[b, :len(indices)] = indices y_lens_sub[b] = len(indices) # NOTE: transcript is seperated by space('_') else: ys_sub = batch['ys_sub'] y_lens_sub = batch['y_lens_sub'] else: ys_sub = None y_lens_sub = None best_hyps, best_hyps_sub, aw, aw_sub, aw_dec = model.attention_weights( batch['xs'], batch['x_lens'], beam_width=beam_width, beam_width_sub=beam_width_sub, max_decode_len=MAX_DECODE_LEN_WORD, max_decode_len_sub=MAX_DECODE_LEN_CHAR, teacher_forcing=a2c_oracle, ys_sub=ys_sub, y_lens_sub=y_lens_sub) for b in range(len(batch['xs'])): word_list = idx2word(best_hyps[b]) if 'word' in dataset.label_type_sub: char_list = idx2word(best_hyps_sub[b]) else: char_list = idx2char(best_hyps_sub[b]) # word to acoustic & character to acoustic plot_hierarchical_attention_weights( aw[b][:len(word_list), :batch['x_lens'][b]], aw_sub[b][:len(char_list), :batch['x_lens'][b]], label_list=word_list, label_list_sub=char_list, spectrogram=batch['xs'][b, :, :dataset.input_freq], save_path=mkdir_join(save_path, batch['input_names'][b] + '.png'), figsize=(40, 8)) # word to characater plot_word2char_attention_weights( aw_dec[b][:len(word_list), :len(char_list)], label_list=word_list, label_list_sub=char_list, save_path=mkdir_join( save_path, batch['input_names'][b] + '_word2char.png'), figsize=(40, 8)) # with open(join(save_path, speaker, batch['input_names'][b] + '.txt'), 'w') as f: # f.write(batch['ys'][b][0]) if is_new_epoch: break
def main(config_path): # Read a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) corpus = config['corpus'] feature = config['feature'] param = config['param'] if corpus['label_type_main'] == 'character': output_size_main = 147 elif corpus['label_type_main'] == 'kanji': output_size_main = 3386 if corpus['label_type_second'] == 'phone': output_size_second = 38 elif corpus['label_type_second'] == 'character': output_size_second = 147 # Model setting CTCModel = load(model_type=config['model_name']) network = CTCModel( batch_size=param['batch_size'], input_size=feature['input_size'] * feature['num_stack'], num_unit=param['num_unit'], num_layer_main=param['num_layer_main'], num_layer_second=param['num_layer_second'], # bottleneck_dim=param['bottleneck_dim'], output_size_main=output_size_main, output_size_second=output_size_second, main_task_weight=param['main_task_weight'], parameter_init=param['weight_init'], clip_grad=param['clip_grad'], clip_activation=param['clip_activation'], dropout_ratio_input=param['dropout_input'], dropout_ratio_hidden=param['dropout_hidden'], num_proj=param['num_proj'], weight_decay=param['weight_decay']) network.model_name = config['model_name'].upper() network.model_name += '_' + str(param['num_unit']) network.model_name += '_main' + str(param['num_layer_main']) network.model_name += '_second' + str(param['num_layer_second']) network.model_name += '_' + param['optimizer'] network.model_name += '_lr' + str(param['learning_rate']) if param['bottleneck_dim'] != 0: network.model_name += '_bottoleneck' + str(param['bottleneck_dim']) if param['num_proj'] != 0: network.model_name += '_proj' + str(param['num_proj']) if feature['num_stack'] != 1: network.model_name += '_stack' + str(feature['num_stack']) if param['weight_decay'] != 0: network.model_name += '_weightdecay' + str(param['weight_decay']) network.model_name += '_taskweight' + str(param['main_task_weight']) if corpus['train_data_size'] == 'large': network.model_name += '_large' # Set save path network.model_dir = mkdir('/n/sd8/inaguma/result/csj/monolog/') network.model_dir = mkdir_join(network.model_dir, 'ctc') network.model_dir = mkdir_join( network.model_dir, corpus['label_type_main'] + '_' + corpus['label_type_second']) network.model_dir = mkdir_join(network.model_dir, network.model_name) # Reset model directory if not isfile(join(network.model_dir, 'complete.txt')): tf.gfile.DeleteRecursively(network.model_dir) tf.gfile.MakeDirs(network.model_dir) else: raise ValueError('File exists.') # Set process name setproctitle('multitaskctc_csj_' + corpus['label_type_main'] + '_' + corpus['label_type_second'] + '_' + corpus['train_data_size']) # Save config file shutil.copyfile(config_path, join(network.model_dir, 'config.yml')) sys.stdout = open(join(network.model_dir, 'train.log'), 'w') print(network.model_name) do_train(network=network, optimizer=param['optimizer'], learning_rate=param['learning_rate'], batch_size=param['batch_size'], epoch_num=param['num_epoch'], label_type_main=corpus['label_type_main'], label_type_second=corpus['label_type_second'], num_stack=feature['num_stack'], num_skip=feature['num_skip'], train_data_size=corpus['train_data_size']) sys.stdout = sys.__stdout__
def main(): args = parser.parse_args() # Load a config file (.yml) params = load_config(join(args.model_path, 'config.yml'), is_eval=True) # Load dataset dataset = Dataset( data_save_path=args.data_save_path, backend=params['backend'], input_freq=params['input_freq'], use_delta=params['use_delta'], use_double_delta=params['use_double_delta'], data_type='eval1', # data_type='eval2', # data_type='eval3', data_size=params['data_size'], label_type=params['label_type'], label_type_sub=params['label_type_sub'], batch_size=args.eval_batch_size, splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=False, reverse=False, tool=params['tool']) params['num_classes'] = dataset.num_classes params['num_classes_sub'] = dataset.num_classes_sub # Load model model = load(model_type=params['model_type'], params=params, backend=params['backend']) # Restore the saved parameters model.load_checkpoint(save_path=args.model_path, epoch=args.epoch) # GPU setting model.set_cuda(deterministic=False, benchmark=True) save_path = mkdir_join(args.model_path, 'att_weights') ###################################################################### # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) for batch, is_new_epoch in dataset: # Decode best_hyps, aw, perm_idx = model.decode( batch['xs'], batch['x_lens'], beam_width=args.beam_width, max_decode_len=MAX_DECODE_LEN_WORD, min_decode_len=MIN_DECODE_LEN_WORD, length_penalty=args.length_penalty, coverage_penalty=args.coverage_penalty) best_hyps_sub, aw_sub, _ = model.decode( batch['xs'], batch['x_lens'], beam_width=args.beam_width_sub, max_decode_len=MAX_DECODE_LEN_CHAR, min_decode_len=MIN_DECODE_LEN_CHAR, length_penalty=args.length_penalty, coverage_penalty=args.coverage_penalty, task_index=1) for b in range(len(batch['xs'])): word_list = dataset.idx2word(best_hyps[b], return_list=True) char_list = dataset.idx2char(best_hyps_sub[b], return_list=True) speaker = batch['input_names'][b].split('_')[0] plot_hierarchical_attention_weights( aw[b][:len(word_list), :batch['x_lens'][b]], aw_sub[b][:len(char_list), :batch['x_lens'][b]], label_list=word_list, label_list_sub=char_list, spectrogram=batch['xs'][b, :, :dataset.input_freq], save_path=mkdir_join(save_path, speaker, batch['input_names'][b] + '.png'), figsize=(40, 8)) if is_new_epoch: break
def plot_attention(model, dataset, max_decode_len, eval_batch_size=None, save_path=None): """Visualize attention weights of attetnion-based model. Args: model: model to evaluate dataset: An instance of a `Dataset` class eval_batch_size (int, optional): the batch size when evaluating the model max_decode_len (int): the length of output sequences to stop prediction when EOS token have not been emitted. save_path (string, optional): path to save attention weights plotting """ # Set batch size in the evaluation if eval_batch_size is not None: dataset.batch_size = eval_batch_size # Clean directory if isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) vocab_file_path = '../metrics/vocab_files/' + \ dataset.label_type + '_' + dataset.data_size + '.txt' if 'char' in dataset.label_type: map_fn = Idx2char(vocab_file_path) else: map_fn = Idx2word(vocab_file_path) for batch, is_new_epoch in dataset: # Decode best_hyps, att_weights = model.attention_weights( batch['xs'], batch['x_lens'], max_decode_len=max_decode_len) # NOTE: attention_weights: `[B, T_out, T_in]` # Visualize for b in range(len(batch['xs'])): # Check if the sum of attention weights equals to 1 # print(np.sum(att_weights[b], axis=1)) str_pred = map_fn(best_hyps[b]) eos = True if '>' in str_pred else False str_pred = str_pred.split('>')[0] # NOTE: Trancate by <EOS> # Remove the last space if len(str_pred) > 0 and str_pred[-1] == '_': str_pred = str_pred[:-1] if eos: str_pred += '_>' speaker = batch['input_names'][b].split('_')[0] plot_attention_weights(attention_weights=att_weights[ b, :len(str_pred.split('_')), :batch['x_lens'][b]], label_list=str_pred.split('_'), save_path=mkdir_join( save_path, speaker, batch['input_names'][b] + '.png'), figsize=(20, 8)) if is_new_epoch: break
def main(): args = parser.parse_args() # Load a config file (.yml) params = load_config(join(args.model_path, 'config.yml'), is_eval=True) # Load dataset dataset = Dataset( data_save_path=args.data_save_path, backend=params['backend'], input_freq=params['input_freq'], use_delta=params['use_delta'], use_double_delta=params['use_double_delta'], data_type='eval1', # data_type='eval2', # data_type='eval3', data_size=params['data_size'], label_type=params['label_type'], batch_size=args.eval_batch_size, splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=False, reverse=False, tool=params['tool']) params['num_classes'] = dataset.num_classes # Load model model = load(model_type=params['model_type'], params=params, backend=params['backend']) # Restore the saved parameters model.load_checkpoint(save_path=args.model_path, epoch=args.epoch) # GPU setting model.set_cuda(deterministic=False, benchmark=True) save_path = mkdir_join(args.model_path, 'att_weights') ###################################################################### # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) if dataset.label_type == 'word': map_fn = dataset.idx2word max_decode_len = MAX_DECODE_LEN_WORD min_decode_len = MIN_DECODE_LEN_WORD else: map_fn = dataset.idx2char max_decode_len = MAX_DECODE_LEN_CHAR min_decode_len = MIN_DECODE_LEN_CHAR for batch, is_new_epoch in dataset: # Decode best_hyps, aw, perm_idx = model.decode( batch['xs'], batch['x_lens'], beam_width=args.beam_width, max_decode_len=max_decode_len, min_decode_len=min_decode_len, length_penalty=args.length_penalty, coverage_penalty=args.coverage_penalty) ys = batch['ys'][perm_idx] y_lens = batch['y_lens'][perm_idx] for b in range(len(batch['xs'])): ############################## # Reference ############################## if dataset.is_test: str_ref = ys[b][0] # NOTE: transcript is seperated by space('_') else: # Convert from list of index to string str_ref = map_fn(ys[b][:y_lens[b]]) token_list = map_fn(best_hyps[b], return_list=True) speaker = batch['input_names'][b].split('_')[0] plot_attention_weights( aw[b][:len(token_list), :batch['x_lens'][b]], label_list=token_list, spectrogram=batch['xs'][b, :, :dataset.input_freq], str_ref=str_ref, save_path=mkdir_join(save_path, speaker, batch['input_names'][b] + '.png'), figsize=(20, 8)) if is_new_epoch: break
def main(config_path, model_save_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['feature'] == 'fbank': input_size = 123 elif params['feature'] == 'is13': input_size = 141 if params['label_type'] in ['original', 'phone3']: params['num_classes'] = 3 elif params['label_type'] == 'phone4': params['num_classes'] = 4 elif params['label_type'] == 'phone43': params['num_classes'] = 43 # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=input_size * params['num_stack'], splice=params['splice'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle('tf_svc_' + model.name + '_' + params['label_type']) model.name += '_' + str(params['num_units']) model.name += '_' + str(params['num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) # Set save path model.save_path = mkdir_join( model_save_path, 'ctc', params['label_type'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params)
def main(config_path, model_save_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] params['sos_index'] = 0 params['eos_index'] = 1 if params['label_type'] == 'phone61': params['att_num_classes'] = 63 params['ctc_num_classes'] = 61 elif params['label_type'] == 'phone48': params['att_num_classes'] = 50 params['ctc_num_classes'] = 48 elif params['label_type'] == 'phone39': params['att_num_classes'] = 41 params['ctc_num_classes'] = 39 elif params['label_type'] == 'character': params['att_num_classes'] = 30 params['ctc_num_classes'] = 28 # Model setting # AttentionModel = load(model_type=config['model_name']) model = JointCTCAttention( input_size=params['input_size'], encoder_num_unit=params['encoder_num_unit'], encoder_num_layer=params['encoder_num_layer'], attention_dim=params['attention_dim'], attention_type=params['attention_type'], decoder_num_unit=params['decoder_num_unit'], decoder_num_layer=params['decoder_num_layer'], embedding_dim=params['embedding_dim'], att_num_classes=params['att_num_classes'], ctc_num_classes=params['ctc_num_classes'], att_task_weight=params['att_task_weight'], sos_index=params['sos_index'], eos_index=params['eos_index'], max_decode_length=params['max_decode_length'], # attention_smoothing=params['attention_smoothing'], attention_weights_tempareture=params['attention_weights_tempareture'], logits_tempareture=params['logits_tempareture'], parameter_init=params['weight_init'], clip_grad=params['clip_grad'], clip_activation_encoder=params['clip_activation_encoder'], clip_activation_decoder=params['clip_activation_decoder'], weight_decay=params['weight_decay']) # Set process name setproctitle('timit_' + model.name + '_' + params['label_type']) model.name = params['model'] model.name += '_encoder' + str(params['encoder_num_unit']) model.name += '_' + str(params['encoder_num_layer']) model.name += '_attdim' + str(params['attention_dim']) model.name += '_decoder' + str(params['decoder_num_unit']) model.name += '_' + str(params['decoder_num_layer']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) model.name += '_' + params['attention_type'] # if bool(params['attention_smoothing']): # model.name += '_smoothing' if params['attention_weights_tempareture'] != 1: model.name += '_sharpening' + \ str(params['attention_weights_tempareture']) if params['weight_decay'] != 0: model.name += '_weightdecay' + str(params['weight_decay']) # Set save path model.save_path = mkdir_join(model_save_path, 'attention', params['label_type'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet # tf.gfile.DeleteRecursively(new_model_path) # tf.gfile.MakeDirs(new_model_path) # break model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) # sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params)
def plot(session, posteriors_op, model, dataset, label_type, num_stack=1, save_path=None, show=False): """Visualize label posteriors of CTC model. Args: session: session of training model posteriois_op: operation for computing posteriors model: the model to evaluate dataset: An instance of a `Dataset` class label_type (string): phone39 or phone48 or phone61 or character or character_capital_divide num_stack (int): the number of frames to stack save_path (string, string): path to save ctc outputs show (bool, optional): if True, show each figure """ # Clean directory if isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) for data, is_new_epoch in dataset: # Create feed dictionary for next mini batch inputs, _, inputs_seq_len, input_names = data feed_dict = { model.inputs_pl_list[0]: inputs, model.inputs_seq_len_pl_list[0]: inputs_seq_len, model.keep_prob_pl_list[0]: 1.0 } # Visualize batch_size, max_frame_num = inputs.shape[:2] probs = session.run(posteriors_op, feed_dict=feed_dict) probs = probs.reshape(-1, max_frame_num, model.num_classes) # Visualize for i_batch in range(batch_size): prob = probs[i_batch][:int(inputs_seq_len[0]), :] plt.clf() plt.figure(figsize=(10, 4)) frame_num = int(inputs_seq_len[i_batch]) times_probs = np.arange(frame_num) * num_stack / 100 # NOTE: Blank class is set to the last class in TensorFlow for i in range(0, prob.shape[-1] - 1, 1): plt.plot(times_probs, prob[:, i]) plt.plot(times_probs, prob[:, -1], ':', label='blank', color='grey') plt.xlabel('Time [sec]', fontsize=12) plt.ylabel('Posteriors', fontsize=12) plt.xlim([0, frame_num * num_stack / 100]) plt.ylim([0.05, 1.05]) plt.xticks(list(range(0, int(frame_num * num_stack / 100) + 1, 1))) plt.yticks(list(range(0, 2, 1))) plt.legend(loc="upper right", fontsize=12) if show: plt.show() # Save as a png file if save_path is not None: plt.savefig(join(save_path, input_names[0] + '.png'), dvi=500) if is_new_epoch: break
def main(config_path, model_save_path): # Read a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank label if params['label_type_main'] == 'kanji': params['num_classes_main'] = 3386 elif params['label_type_main'] == 'kana': params['num_classes_main'] = 147 else: raise TypeError if params['label_type_sub'] == 'kana': params['num_classes_sub'] = 147 elif params['label_type_sub'] == 'phone': params['num_classes_sub'] = 38 else: TypeError # Model setting model = load(model_type=params['model']) model = model(batch_size=params['batch_size'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layer_main=params['num_layer_main'], num_layer_sub=params['num_layer_sub'], # bottleneck_dim=params['bottleneck_dim'], num_classes_main=params['num_classes_main'], num_classes_sub=params['num_classes_sub'], main_task_weight=params['main_task_weight'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.model_name = params['model'] model.model_name += '_' + str(params['num_units']) model.model_name += '_main' + str(params['num_layer_main']) model.model_name += '_sub' + str(params['num_layer_sub']) model.model_name += '_' + params['optimizer'] model.model_name += '_lr' + str(params['learning_rate']) if params['bottleneck_dim'] != 0: model.model_name += '_bottoleneck' + str(params['bottleneck_dim']) if params['num_proj'] != 0: model.model_name += '_proj' + str(params['num_proj']) if params['num_stack'] != 1: model.model_name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.model_name += '_weightdecay' + str(params['weight_decay']) model.model_name += '_taskweight' + str(params['main_task_weight']) if params['train_data_size'] == 'large': model.model_name += '_large' # Set save path model.save_path = mkdir(model_save_path) model.save_path = mkdir_join(model.save_path, 'ctc') model.save_path = mkdir_join( model.save_path, params['label_type_main'] + '_' + params['label_type_sub']) model.save_path = mkdir_join(model.save_path, model.model_name) # Reset model directory if not isfile(join(model.save_path, 'complete.txt')): tf.gfile.DeleteRecursively(model.save_path) tf.gfile.MakeDirs(model.save_path) else: raise ValueError('File exists.') # Set process name setproctitle('csj_multictc_' + params['label_type_main'] + '_' + params['label_type_sub'] + '_' + params['train_data_size']) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') do_train(model=model, params=params)
def main(config_path, model_save_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a <SOS> and <EOS> class if params['label_type'] == 'phone61': params['num_classes'] = 61 elif params['label_type'] == 'phone48': params['num_classes'] = 48 elif params['label_type'] == 'phone39': params['num_classes'] = 39 elif params['label_type'] == 'character': params['num_classes'] = 28 elif params['label_type'] == 'character_capital_divide': params['num_classes'] = 72 else: raise TypeError # Model setting model = JointCTCAttention( input_size=params['input_size'] * params['num_stack'], encoder_type=params['encoder_type'], encoder_num_units=params['encoder_num_units'], encoder_num_layers=params['encoder_num_layers'], encoder_num_proj=params['encoder_num_proj'], attention_type=params['attention_type'], attention_dim=params['attention_dim'], decoder_type=params['decoder_type'], decoder_num_units=params['decoder_num_units'], decoder_num_layers=params['decoder_num_layers'], embedding_dim=params['embedding_dim'], lambda_weight=params['lambda_weight'], num_classes=params['num_classes'], sos_index=params['num_classes'], eos_index=params['num_classes'] + 1, max_decode_length=params['max_decode_length'], lstm_impl='LSTMBlockCell', use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation_encoder=params['clip_activation_encoder'], clip_activation_decoder=params['clip_activation_decoder'], weight_decay=params['weight_decay'], time_major=True, sharpening_factor=params['sharpening_factor'], logits_temperature=params['logits_temperature']) # Set process name setproctitle('tf_timit_' + model.name + '_' + params['label_type'] + '_' + params['attention_type']) model.name += '_en' + str(params['encoder_num_units']) model.name += '_' + str(params['encoder_num_layers']) model.name += '_att' + str(params['attention_dim']) model.name += '_de' + str(params['decoder_num_units']) model.name += '_' + str(params['decoder_num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) model.name += '_' + params['attention_type'] if params['dropout_encoder'] != 0: model.name += '_dropen' + str(params['dropout_encoder']) if params['dropout_decoder'] != 0: model.name += '_dropde' + str(params['dropout_decoder']) if params['dropout_embedding'] != 0: model.name += '_dropem' + str(params['dropout_embedding']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += 'wd' + str(params['weight_decay']) if params['sharpening_factor'] != 1: model.name += '_sharp' + str(params['sharpening_factor']) if params['logits_temperature'] != 1: model.name += '_temp' + str(params['logits_temperature']) model.name += '_lambda' + str(params['lambda_weight']) # Set save path model.save_path = mkdir_join( model_save_path, 'joint_ctc_attention', params['label_type'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params)
def plot(session, decode_op, attention_weights_op, model, dataset, label_type, is_test=False, save_path=None, show=False): """Visualize attention weights of Attetnion-based model. Args: session: session of training model decode_op: operation for decoding attention_weights_op: operation for computing attention weights model: model to evaluate dataset: An instance of a `Dataset` class label_type (string, optional): phone39 or phone48 or phone61 or character or character_capital_divide is_test (bool, optional): save_path (string, optional): path to save attention weights plotting show (bool, optional): if True, show each figure """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) if label_type == 'character': map_fn = Idx2char( map_file_path='../metrics/mapping_files/character.txt') elif label_type == 'character_capital_divide': map_fn = Idx2char( map_file_path= '../metrics/mapping_files/character_capital_divide.txt', capital_divide=True) else: map_fn = Idx2phone(map_file_path='../metrics/mapping_files/' + label_type + '.txt') for data, is_new_epoch in dataset: # Create feed dictionary for next mini batch inputs, labels_true, inputs_seq_len, _, input_names = data feed_dict = { model.inputs_pl_list[0]: inputs[0], model.inputs_seq_len_pl_list[0]: inputs_seq_len[0], model.keep_prob_encoder_pl_list[0]: 1.0, model.keep_prob_decoder_pl_list[0]: 1.0, model.keep_prob_embedding_pl_list[0]: 1.0 } # Visualize batch_size, max_frame_num = inputs.shape[:2] attention_weights, labels_pred = session.run( [attention_weights_op, decode_op], feed_dict=feed_dict) for i_batch in range(batch_size): # t_out, t_in = attention_weights[i_batch].shape # Check if the sum of attention weights equals to 1 # print(np.sum(attention_weights[i_batch], axis=1)) # Convert from index to label str_pred = map_fn(labels_pred[i_batch]) if 'phone' in label_type: label_list = str_pred.split(' ') else: raise NotImplementedError plt.clf() plt.figure(figsize=(10, 4)) sns.heatmap(attention_weights[i_batch], cmap='Blues', xticklabels=False, yticklabels=label_list) plt.xlabel('Input frames', fontsize=12) plt.ylabel('Output labels (top to bottom)', fontsize=12) if show: plt.show() # Save as a png file if save_path is not None: plt.savefig(join(save_path, input_names[0] + '.png'), dvi=500) if is_new_epoch: break
def plot(session, posteriors_op, model, dataset, label_type, num_stack=1, save_path=None, show=False): """Visualize label posteriors of CTC model. Args: session: session of training model posteriois_op: operation for computing posteriors model: the model to evaluate dataset: An instance of a `Dataset` class label_type (string): phone39 or phone48 or phone61 or character or character_capital_divide num_stack (int): the number of frames to stack save_path (string, string): path to save ctc outputs show (bool, optional): if True, show each figure """ # Clean directory if isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) for data, is_new_epoch in dataset: # Create feed dictionary for next mini batch inputs, _, inputs_seq_len, input_names = data feed_dict = { model.inputs_pl_list[0]: inputs, model.inputs_seq_len_pl_list[0]: inputs_seq_len, model.keep_prob_pl_list[0]: 1.0 } # Visualize batch_size, max_frame_num = inputs.shape[:2] probs = session.run(posteriors_op, feed_dict=feed_dict) probs = probs.reshape(-1, max_frame_num, model.num_classes) # Visualize for i_batch in range(batch_size): prob = probs[i_batch][:int(inputs_seq_len[0]), :] plt.clf() plt.figure(figsize=(10, 4)) frame_num = int(inputs_seq_len[i_batch]) times_probs = np.arange(frame_num) * num_stack / 100 # NOTE: Blank class is set to the last class in TensorFlow for i in range(0, prob.shape[-1] - 1, 1): plt.plot(times_probs, prob[:, i]) plt.plot(times_probs, prob[:, -1], ':', label='blank', color='grey') plt.xlabel('Time [sec]', fontsize=12) plt.ylabel('Posteriors', fontsize=12) plt.xlim([0, frame_num * num_stack / 100]) plt.ylim([0.05, 1.05]) plt.xticks(list(range(0, int(frame_num * num_stack / 100) + 1, 1))) plt.yticks(list(range(0, 2, 1))) plt.legend(loc="upper right", fontsize=12) if show: plt.show() # Save as a png file if save_path is not None: plt.savefig(join(save_path, input_names[0] + '.png'), dvi=500) if is_new_epoch: break
def main(config_path, model_save_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank label if params['label_type'] == 'kana': params['num_classes'] = 146 elif params['label_type'] == 'kana_divide': params['num_classes'] = 147 elif params['label_type'] == 'kanji': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2981 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3385 elif params['label_type'] == 'kanji_divide': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2982 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3386 else: raise TypeError # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle( 'tf_csj_' + model.name + '_' + params['train_data_size'] + '_' + params['label_type']) model.name += '_' + str(params['num_units']) model.name += '_' + str(params['num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) if params['bottleneck_dim'] != 0: model.name += '_bottle' + str(params['bottleneck_dim']) if len(gpu_indices) >= 2: model.name += '_gpu' + str(len(gpu_indices)) # Set save path model.save_path = mkdir_join( model_save_path, 'ctc', params['label_type'], params['train_data_size'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params, gpu_indices=gpu_indices)
def main(config_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) corpus = config['corpus'] feature = config['feature'] param = config['param'] if corpus['label_type'] == 'phone61': output_size = 63 elif corpus['label_type'] == 'phone48': output_size = 50 elif corpus['label_type'] == 'phone39': output_size = 41 elif corpus['label_type'] == 'character': output_size = 33 # Model setting # AttentionModel = load(model_type=config['model_name']) network = blstm_attention_seq2seq.BLSTMAttetion( batch_size=param['batch_size'], input_size=feature['input_size'], encoder_num_unit=param['encoder_num_unit'], encoder_num_layer=param['encoder_num_layer'], attention_dim=param['attention_dim'], decoder_num_unit=param['decoder_num_unit'], decoder_num_layer=param['decoder_num_layer'], embedding_dim=param['embedding_dim'], output_size=output_size, sos_index=output_size - 2, eos_index=output_size - 1, max_decode_length=param['max_decode_length'], attention_weights_tempareture=param['attention_weights_tempareture'], logits_tempareture=param['logits_tempareture'], parameter_init=param['weight_init'], clip_grad=param['clip_grad'], clip_activation_encoder=param['clip_activation_encoder'], clip_activation_decoder=param['clip_activation_decoder'], dropout_ratio_input=param['dropout_input'], dropout_ratio_hidden=param['dropout_hidden'], weight_decay=param['weight_decay']) network.model_name = config['model_name'].upper() network.model_name += '_encoder' + str(param['encoder_num_unit']) network.model_name += '_' + str(param['encoder_num_layer']) network.model_name += '_attdim' + str(param['attention_dim']) network.model_name += '_decoder' + str(param['decoder_num_unit']) network.model_name += '_' + str(param['decoder_num_layer']) network.model_name += '_' + param['optimizer'] network.model_name += '_lr' + str(param['learning_rate']) if param['weight_decay'] != 0: network.model_name += '_weightdecay' + str(param['weight_decay']) # Set save path network.model_dir = mkdir('/n/sd8/inaguma/result/timit/attention/') network.model_dir = mkdir_join(network.model_dir, corpus['label_type']) network.model_dir = mkdir_join(network.model_dir, network.model_name) # Reset model directory if not isfile(join(network.model_dir, 'complete.txt')): tf.gfile.DeleteRecursively(network.model_dir) tf.gfile.MakeDirs(network.model_dir) else: raise ValueError('File exists.') # Set process name setproctitle('attention_timit_' + corpus['label_type']) # Save config file shutil.copyfile(config_path, join(network.model_dir, 'config.yml')) sys.stdout = open(join(network.model_dir, 'train.log'), 'w') print(network.model_name) do_train(network=network, optimizer=param['optimizer'], learning_rate=param['learning_rate'], batch_size=param['batch_size'], epoch_num=param['num_epoch'], label_type=corpus['label_type'], eos_index=output_size - 1) sys.stdout = sys.__stdout__
def main(config_path, model_save_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class params['num_classes'] = 28 # Model setting model = StudentCTC( encoder_type=params['encoder_type'], input_size=params['input_size'] * params['num_stack'] * params['splice'], splice=params['splice'], num_stack=params['num_stack'], num_classes=params['num_classes'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], weight_decay=params['weight_decay']) # Set process name setproctitle( 'tf_libri_' + model.name + '_' + params['train_data_size'] + '_' + params['label_type']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) if len(gpu_indices) >= 2: model.name += '_gpu' + str(len(gpu_indices)) # Set save path model.save_path = mkdir_join( model_save_path, 'student_ctc', params['label_type'], params['train_data_size'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params, gpu_indices=gpu_indices)
def plot(model, dataset, beam_width, beam_width_sub, eval_batch_size=None, save_path=None): """Visualize attention weights of Attetnion-based model. Args: model: model to evaluate dataset: An instance of a `Dataset` class beam_width: (int): the size of beam in the main task beam_width_sub: (int): the size of beam in the sub task eval_batch_size (int, optional): the batch size when evaluating the model save_path (string, optional): path to save attention weights plotting """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) idx2word = Idx2word(dataset.vocab_file_path, return_list=True) idx2char = Idx2char(dataset.vocab_file_path_sub, return_list=True) for batch, is_new_epoch in dataset: best_hyps, best_hyps_sub, aw, aw_sub, aw_dec = model.attention_weights( batch['xs'], batch['x_lens'], beam_width=beam_width, beam_width_sub=beam_width_sub, max_decode_len=MAX_DECODE_LEN_WORD, max_decode_len_sub=MAX_DECODE_LEN_CHAR) for b in range(len(batch['xs'])): word_list = idx2word(best_hyps[b]) char_list = idx2char(best_hyps_sub[b]) # if word_list.count('OOV') < 1: # continue speaker = '_'.join(batch['input_names'][b].split('_')[:2]) # word to acoustic & character to acoustic plot_hierarchical_attention_weights( aw[b][:len(word_list), :batch['x_lens'][b]], aw_sub[b][:len(char_list), :batch['x_lens'][b]], label_list=word_list, label_list_sub=char_list, spectrogram=batch['xs'][b, :, :dataset.input_freq], save_path=mkdir_join(save_path, speaker, batch['input_names'][b] + '.png'), figsize=(50, 10)) # word to characater attention plot_word2char_attention_weights( aw_dec[b][:len(word_list), :len(char_list)], label_list=word_list, label_list_sub=char_list, save_path=mkdir_join( save_path, speaker, batch['input_names'][b] + '_word2char.png'), figsize=(50, 10)) with open( join(save_path, speaker, batch['input_names'][b] + '.txt'), 'w') as f: f.write(batch['ys'][b][0]) if is_new_epoch: break
def main(config_path, model_save_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a <SOS> and <EOS> class if params['label_type'] == 'phone61': params['num_classes'] = 61 elif params['label_type'] == 'phone48': params['num_classes'] = 48 elif params['label_type'] == 'phone39': params['num_classes'] = 39 elif params['label_type'] == 'character': params['num_classes'] = 28 elif params['label_type'] == 'character_capital_divide': params['num_classes'] = 72 else: raise TypeError # Model setting model = AttentionSeq2Seq( input_size=params['input_size'] * params['num_stack'], encoder_type=params['encoder_type'], encoder_num_units=params['encoder_num_units'], encoder_num_layers=params['encoder_num_layers'], encoder_num_proj=params['encoder_num_proj'], attention_type=params['attention_type'], attention_dim=params['attention_dim'], decoder_type=params['decoder_type'], decoder_num_units=params['decoder_num_units'], decoder_num_layers=params['decoder_num_layers'], embedding_dim=params['embedding_dim'], num_classes=params['num_classes'], sos_index=params['num_classes'], eos_index=params['num_classes'] + 1, max_decode_length=params['max_decode_length'], lstm_impl='LSTMBlockCell', use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation_encoder=params['clip_activation_encoder'], clip_activation_decoder=params['clip_activation_decoder'], weight_decay=params['weight_decay'], time_major=True, sharpening_factor=params['sharpening_factor'], logits_temperature=params['logits_temperature'], sigmoid_smoothing=params['sigmoid_smoothing']) # Set process name setproctitle('tf_timit_' + model.name + '_' + params['label_type'] + '_' + params['attention_type']) model.name = 'en' + str(params['encoder_num_units']) model.name += '_' + str(params['encoder_num_layers']) model.name += '_att' + str(params['attention_dim']) model.name += '_de' + str(params['decoder_num_units']) model.name += '_' + str(params['decoder_num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) model.name += '_' + params['attention_type'] if params['dropout_encoder'] != 0: model.name += '_dropen' + str(params['dropout_encoder']) if params['dropout_decoder'] != 0: model.name += '_dropde' + str(params['dropout_decoder']) if params['dropout_embedding'] != 0: model.name += '_dropem' + str(params['dropout_embedding']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += 'wd' + str(params['weight_decay']) if params['sharpening_factor'] != 1: model.name += '_sharp' + str(params['sharpening_factor']) if params['logits_temperature'] != 1: model.name += '_temp' + str(params['logits_temperature']) if bool(params['sigmoid_smoothing']): model.name += '_smoothing' # Set save path model.save_path = mkdir_join(model_save_path, 'attention', params['label_type'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params)
def main(config_path, model_save_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type_main'] == 'character': params['num_classes_main'] = 28 elif params['label_type_main'] == 'character_capital_divide': params['num_classes_main'] = 72 else: raise TypeError if params['label_type_sub'] == 'phone61': params['num_classes_sub'] = 61 elif params['label_type_sub'] == 'phone48': params['num_classes_sub'] = 48 elif params['label_type_sub'] == 'phone39': params['num_classes_sub'] = 39 else: raise TypeError # Model setting model = MultitaskCTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers_main=params['num_layers_main'], num_layers_sub=params['num_layers_sub'], num_classes_main=params['num_classes_main'], num_classes_sub=params['num_classes_sub'], main_task_weight=params['main_task_weight'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle('tf_timit_' + model.name + '_' + params['label_type_main'] + '_' + params['label_type_sub']) model.name += '_' + str(params['num_units']) model.name += '_main' + str(params['num_layers_main']) model.name += '_sub' + str(params['num_layers_sub']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) model.name += '_main' + str(params['main_task_weight']) # Set save path model.save_path = mkdir_join( model_save_path, 'ctc', 'char_' + params['label_type_sub'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params)
def main(): args = parser.parse_args() # Load a config file (.yml) params = load_config(join(args.model_path, 'config.yml'), is_eval=True) # Load dataset dataset = Dataset(data_save_path=args.data_save_path, backend=params['backend'], input_freq=params['input_freq'], use_delta=params['use_delta'], use_double_delta=params['use_double_delta'], data_type='test', label_type=params['label_type'], batch_size=args.eval_batch_size, splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=True, reverse=True, tool=params['tool']) params['num_classes'] = dataset.num_classes # Load model model = load(model_type=params['model_type'], params=params, backend=params['backend']) # Restore the saved parameters model.load_checkpoint(save_path=args.model_path, epoch=args.epoch) # GPU setting model.set_cuda(deterministic=False, benchmark=True) save_path = mkdir_join(args.model_path, 'ctc_probs') ###################################################################### # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) for batch, is_new_epoch in dataset: # Get CTC probs probs, x_lens, _ = model.posteriors(batch['xs'], batch['x_lens'], temperature=1) # NOTE: probs: '[B, T, num_classes]' # Visualize for b in range(len(batch['xs'])): plot_ctc_probs(probs[b, :x_lens[b], :], frame_num=x_lens[b], num_stack=dataset.num_stack, spectrogram=batch['xs'][b, :, :40], save_path=join(save_path, batch['input_names'][b] + '.png'), figsize=(14, 7)) if is_new_epoch: break
def main(config_path, model_save_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type'] == 'character': params['num_classes'] = 28 elif params['label_type'] == 'character_capital_divide': params['num_classes'] = 77 elif params['label_type'] == 'word': if params['train_data_size'] == 'train_clean100': params['num_classes'] = 7213 elif params['train_data_size'] == 'train_clean360': params['num_classes'] = 16287 elif params['train_data_size'] == 'train_other500': params['num_classes'] = 18669 elif params['train_data_size'] == 'train_all': params['num_classes'] = 26642 # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'] * params['num_stack'], splice=params['splice'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad=params['clip_grad'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle('libri_' + model.name + '_' + params['train_data_size'] + '_' + params['label_type']) model.name += '_' + str(params['num_units']) model.name += '_' + str(params['num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout_hidden'] != 1: model.name += '_drop' + str(params['dropout_hidden']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) if params['bottleneck_dim'] != 0: model.name += '_bottle' + str(params['bottleneck_dim']) if len(gpu_indices) >= 2: model.name += '_gpu' + str(len(gpu_indices)) # Set save path model.save_path = mkdir_join(model_save_path, 'ctc', params['label_type'], params['train_data_size'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params, gpu_indices=gpu_indices)
def main(config_path, model_save_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a <SOS> and <EOS> class if params['label_type'] == 'kana': params['num_classes'] = 146 elif params['label_type'] == 'kana_divide': params['num_classes'] = 147 elif params['label_type'] == 'kanji': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2981 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3385 elif params['label_type'] == 'kanji_divide': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2982 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3386 else: raise TypeError # Model setting model = AttentionSeq2Seq( input_size=params['input_size'] * params['num_stack'], encoder_type=params['encoder_type'], encoder_num_units=params['encoder_num_units'], encoder_num_layers=params['encoder_num_layers'], encoder_num_proj=params['encoder_num_proj'], attention_type=params['attention_type'], attention_dim=params['attention_dim'], decoder_type=params['decoder_type'], decoder_num_units=params['decoder_num_units'], decoder_num_layers=params['decoder_num_layers'], embedding_dim=params['embedding_dim'], num_classes=params['num_classes'], sos_index=params['num_classes'], eos_index=params['num_classes'] + 1, max_decode_length=params['max_decode_length'], lstm_impl='LSTMBlockCell', use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation_encoder=params['clip_activation_encoder'], clip_activation_decoder=params['clip_activation_decoder'], weight_decay=params['weight_decay'], time_major=True, sharpening_factor=params['sharpening_factor'], logits_temperature=params['logits_temperature'], sigmoid_smoothing=params['sigmoid_smoothing']) # Set process name setproctitle('tf_csj_' + model.name + '_' + params['train_data_size'] + '_' + params['label_type'] + '_' + params['attention_type']) model.name = 'en' + str(params['encoder_num_units']) model.name += '_' + str(params['encoder_num_layers']) model.name += '_att' + str(params['attention_dim']) model.name += '_de' + str(params['decoder_num_units']) model.name += '_' + str(params['decoder_num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) model.name += '_' + params['attention_type'] if params['dropout_encoder'] != 0: model.name += '_dropen' + str(params['dropout_encoder']) if params['dropout_decoder'] != 0: model.name += '_dropde' + str(params['dropout_decoder']) if params['dropout_embedding'] != 0: model.name += '_dropem' + str(params['dropout_embedding']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += 'wd' + str(params['weight_decay']) if params['sharpening_factor'] != 1: model.name += '_sharp' + str(params['sharpening_factor']) if params['logits_temperature'] != 1: model.name += '_temp' + str(params['logits_temperature']) if bool(params['sigmoid_smoothing']): model.name += '_smoothing' if len(gpu_indices) >= 2: model.name += '_gpu' + str(len(gpu_indices)) # Set save path model.save_path = mkdir_join( model_save_path, 'attention', params['label_type'], params['train_data_size'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params, gpu_indices=gpu_indices)
def plot(session, decode_op, attention_weights_op, model, dataset, label_type, is_test=False, save_path=None, show=False): """Visualize attention weights of Attetnion-based model. Args: session: session of training model decode_op: operation for decoding attention_weights_op: operation for computing attention weights model: model to evaluate dataset: An instance of a `Dataset` class label_type (string, optional): phone39 or phone48 or phone61 or character or character_capital_divide is_test (bool, optional): save_path (string, optional): path to save attention weights plotting show (bool, optional): if True, show each figure """ # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) if label_type == 'character': map_fn = Idx2char( map_file_path='../metrics/mapping_files/character.txt') elif label_type == 'character_capital_divide': map_fn = Idx2char( map_file_path='../metrics/mapping_files/character_capital_divide.txt', capital_divide=True) else: map_fn = Idx2phone( map_file_path='../metrics/mapping_files/' + label_type + '.txt') for data, is_new_epoch in dataset: # Create feed dictionary for next mini batch inputs, labels_true, inputs_seq_len, _, input_names = data feed_dict = { model.inputs_pl_list[0]: inputs[0], model.inputs_seq_len_pl_list[0]: inputs_seq_len[0], model.keep_prob_encoder_pl_list[0]: 1.0, model.keep_prob_decoder_pl_list[0]: 1.0, model.keep_prob_embedding_pl_list[0]: 1.0 } # Visualize batch_size, max_frame_num = inputs.shape[:2] attention_weights, labels_pred = session.run( [attention_weights_op, decode_op], feed_dict=feed_dict) for i_batch in range(batch_size): # t_out, t_in = attention_weights[i_batch].shape # Check if the sum of attention weights equals to 1 # print(np.sum(attention_weights[i_batch], axis=1)) # Convert from index to label str_pred = map_fn(labels_pred[i_batch]) if 'phone' in label_type: label_list = str_pred.split(' ') else: raise NotImplementedError plt.clf() plt.figure(figsize=(10, 4)) sns.heatmap(attention_weights[i_batch], cmap='Blues', xticklabels=False, yticklabels=label_list) plt.xlabel('Input frames', fontsize=12) plt.ylabel('Output labels (top to bottom)', fontsize=12) if show: plt.show() # Save as a png file if save_path is not None: plt.savefig(join(save_path, input_names[0] + '.png'), dvi=500) if is_new_epoch: break
def main(config_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) corpus = config['corpus'] feature = config['feature'] param = config['param'] if corpus['label_type'] == 'phone61': output_size = 61 elif corpus['label_type'] == 'phone48': output_size = 48 elif corpus['label_type'] == 'phone39': output_size = 39 elif corpus['label_type'] == 'character': output_size = 30 # Model setting CTCModel = load(model_type=config['model_name']) network = CTCModel(batch_size=param['batch_size'], input_size=feature['input_size'] * feature['num_stack'], num_unit=param['num_unit'], num_layer=param['num_layer'], output_size=output_size, parameter_init=param['weight_init'], clip_grad=param['clip_grad'], clip_activation=param['clip_activation'], dropout_ratio_input=param['dropout_input'], dropout_ratio_hidden=param['dropout_hidden'], num_proj=param['num_proj'], weight_decay=param['weight_decay']) network.model_name = config['model_name'].upper() network.model_name += '_' + str(param['num_unit']) network.model_name += '_' + str(param['num_layer']) network.model_name += '_' + param['optimizer'] network.model_name += '_lr' + str(param['learning_rate']) if param['num_proj'] != 0: network.model_name += '_proj' + str(param['num_proj']) if feature['num_stack'] != 1: network.model_name += '_stack' + str(feature['num_stack']) if param['weight_decay'] != 0: network.model_name += '_weightdecay' + str(param['weight_decay']) network.model_name += '_' + str(len(gpu_indices)) + 'gpu' # Set save path network.model_dir = mkdir('/n/sd8/inaguma/result/timit/ctc/') network.model_dir = mkdir_join(network.model_dir, corpus['label_type']) network.model_dir = mkdir_join(network.model_dir, network.model_name) # Reset model directory if not isfile(join(network.model_dir, 'complete.txt')): tf.gfile.DeleteRecursively(network.model_dir) tf.gfile.MakeDirs(network.model_dir) else: raise ValueError('File exists.') # Set process name setproctitle('multigpu_ctc_timit_' + corpus['label_type']) # Save config file shutil.copyfile(config_path, join(network.model_dir, 'config.yml')) sys.stdout = open(join(network.model_dir, 'train.log'), 'w') print(network.model_name) do_train(network=network, optimizer=param['optimizer'], learning_rate=param['learning_rate'], batch_size=param['batch_size'], epoch_num=param['num_epoch'], label_type=corpus['label_type'], num_stack=feature['num_stack'], num_skip=feature['num_skip'], gpu_indices=gpu_indices) sys.stdout = sys.__stdout__
def main(): args = parser.parse_args() # Load a config file (.yml) params = load_config(join(args.model_path, 'config.yml'), is_eval=True) # Load dataset dataset = Dataset( data_save_path=args.data_save_path, backend=params['backend'], input_freq=params['input_freq'], use_delta=params['use_delta'], use_double_delta=params['use_double_delta'], data_type='eval1', # data_type='eval2', # data_type='eval3', data_size=params['data_size'], label_type=params['label_type'], label_type_sub=params['label_type_sub'], batch_size=args.eval_batch_size, splice=params['splice'], num_stack=params['num_stack'], num_skip=params['num_skip'], sort_utt=False, reverse=False, tool=params['tool']) params['num_classes'] = dataset.num_classes params['num_classes_sub'] = dataset.num_classes_sub # Load model model = load(model_type=params['model_type'], params=params, backend=params['backend']) # Restore the saved parameters model.load_checkpoint(save_path=args.model_path, epoch=args.epoch) # GPU setting model.set_cuda(deterministic=False, benchmark=True) a2c_oracle = False save_path = mkdir_join(args.model_path, 'att_weights') ###################################################################### # Clean directory if save_path is not None and isdir(save_path): shutil.rmtree(save_path) mkdir(save_path) for batch, is_new_epoch in dataset: batch_size = len(batch['xs']) if a2c_oracle: if dataset.is_test: max_label_num = 0 for b in range(batch_size): if max_label_num < len(list(batch['ys_sub'][b][0])): max_label_num = len(list(batch['ys_sub'][b][0])) ys_sub = np.zeros((batch_size, max_label_num), dtype=np.int32) ys_sub -= 1 # pad with -1 y_lens_sub = np.zeros((batch_size, ), dtype=np.int32) for b in range(batch_size): indices = dataset.char2idx(batch['ys_sub'][b][0]) ys_sub[b, :len(indices)] = indices y_lens_sub[b] = len(indices) # NOTE: transcript is seperated by space('_') else: ys_sub = batch['ys_sub'] y_lens_sub = batch['y_lens_sub'] else: ys_sub = None y_lens_sub = None best_hyps, aw, best_hyps_sub, aw_sub, aw_dec, _ = model.decode( batch['xs'], batch['x_lens'], beam_width=args.beam_width, max_decode_len=MAX_DECODE_LEN_WORD, min_decode_len=MIN_DECODE_LEN_WORD, beam_width_sub=args.beam_width_sub, max_decode_len_sub=MAX_DECODE_LEN_CHAR, min_decode_len_sub=MIN_DECODE_LEN_CHAR, length_penalty=args.length_penalty, coverage_penalty=args.coverage_penalty, teacher_forcing=a2c_oracle, ys_sub=ys_sub, y_lens_sub=y_lens_sub) for b in range(len(batch['xs'])): word_list = dataset.idx2word(best_hyps[b], return_list=True) if dataset.label_type_sub == 'word': char_list = dataset.idx2word(best_hyps_sub[b], return_list=True) else: char_list = dataset.idx2char(best_hyps_sub[b], return_list=True) speaker = batch['input_names'][b].split('_')[0] # word to acoustic & character to acoustic plot_hierarchical_attention_weights( aw[b][:len(word_list), :batch['x_lens'][b]], aw_sub[b][:len(char_list), :batch['x_lens'][b]], label_list=word_list, label_list_sub=char_list, spectrogram=batch['xs'][b, :, :dataset.input_freq], save_path=mkdir_join(save_path, speaker, batch['input_names'][b] + '.png'), figsize=(40, 8)) # word to characater plot_nested_attention_weights( aw_dec[b][:len(word_list), :len(char_list)], label_list=word_list, label_list_sub=char_list, save_path=mkdir_join( save_path, speaker, batch['input_names'][b] + '_word2char.png'), figsize=(40, 8)) # with open(join(save_path, speaker, batch['input_names'][b] + '.txt'), 'w') as f: # f.write(batch['ys'][b][0]) if is_new_epoch: break