def main(): args = parser.parse_args() # Load config file with open(os.path.join(args.model_path, 'config.yml'), "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type'] == 'character': params['num_classes'] = 28 # Model setting model = CTC( encoder_type=params['encoder_type'], input_size=params['input_size'] * params['num_stack'], splice=params['splice'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.save_path = args.model_path do_eval(model=model, params=params, epoch=args.epoch, beam_width=args.beam_width, eval_batch_size=args.eval_batch_size, temperature=args.temperature)
def main(): args = parser.parse_args() # Load config file with open(os.path.join(args.model_path, 'config.yml'), "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type'] == 'character': params['num_classes'] = 28 # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'] * params['num_stack'], splice=params['splice'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.save_path = args.model_path do_eval(model=model, params=params, epoch=args.epoch, beam_width=args.beam_width, eval_batch_size=args.eval_batch_size, temperature=args.temperature)
def main(): args = parser.parse_args() # Load config file with open(join(args.model_path, 'config.yml'), "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type'] == 'character': params['num_classes'] = 28 elif params['label_type'] == 'character_capital_divide': if params['train_data_size'] == 'train100h': params['num_classes'] = 72 elif params['train_data_size'] == 'train460h': params['num_classes'] = 77 elif params['train_data_size'] == 'train960h': params['num_classes'] = 77 elif params['label_type'] == 'word_freq10': if params['train_data_size'] == 'train100h': params['num_classes'] = 7213 elif params['train_data_size'] == 'train460h': params['num_classes'] = 18641 elif params['train_data_size'] == 'train960h': params['num_classes'] = 26642 else: raise TypeError # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.save_path = args.model_path do_eval(model=model, params=params, epoch=args.epoch, eval_batch_size=args.eval_batch_size, beam_width=args.beam_width)
def main(): args = parser.parse_args() # Load config file with open(join(args.model_path, 'config.yml'), "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['label_type'] == 'character': params['num_classes'] = 28 elif params['label_type'] == 'character_capital_divide': if params['train_data_size'] == 'train100h': params['num_classes'] = 72 elif params['train_data_size'] == 'train460h': params['num_classes'] = 77 elif params['train_data_size'] == 'train960h': params['num_classes'] = 77 elif params['label_type'] == 'word_freq10': if params['train_data_size'] == 'train100h': params['num_classes'] = 7213 elif params['train_data_size'] == 'train460h': params['num_classes'] = 18641 elif params['train_data_size'] == 'train960h': params['num_classes'] = 26642 else: raise TypeError # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'] splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.save_path = args.model_path do_decode(model=model, params=params, epoch=args.epoch, beam_width=args.beam_width, eval_batch_size=args.eval_batch_size)
def main(): args = parser.parse_args() # Load config file with open(join(args.model_path, 'config.yml'), "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank label if params['label_type'] == 'kana': params['num_classes'] = 146 elif params['label_type'] == 'kana_divide': params['num_classes'] = 147 elif params['label_type'] == 'kanji': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2981 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3385 elif params['label_type'] == 'kanji_divide': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2982 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3386 else: raise TypeError # Modle setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.save_path = args.model_path do_decode(model=model, params=params, epoch=args.epoch, beam_width=args.beam_width, eval_batch_size=args.eval_batch_size)
def main(): args = parser.parse_args() # Load config file with open(join(args.model_path, 'config.yml'), "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank label if params['label_type'] == 'phone61': params['num_classes'] = 61 elif params['label_type'] == 'phone48': params['num_classes'] = 48 elif params['label_type'] == 'phone39': params['num_classes'] = 39 elif params['label_type'] == 'character': params['num_classes'] = 28 elif params['label_type'] == 'character_capital_divide': params['num_classes'] = 72 else: raise ValueError # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.save_path = args.model_path do_plot(model=model, params=params, epoch=args.epoch, eval_batch_size=args.eval_batch_size)
def main(): args = parser.parse_args() # Load config file with open(join(args.model_path, 'config.yml'), "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank label if params['ss_type'] == 'remove': params['num_classes'] = 147 elif params['ss_type'] in ['insert_left', 'insert_right']: params['num_classes'] = 151 elif params['ss_type'] == 'insert_both': params['num_classes'] = 155 else: raise TypeError # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) model.save_path = args.model_path do_decode(model=model, params=params, epoch=args.epoch, beam_width=args.beam_width, eval_batch_size=args.eval_batch_size)
def main(config_path, model_save_path): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class if params['feature'] == 'fbank': input_size = 123 elif params['feature'] == 'is13': input_size = 141 if params['label_type'] in ['original', 'phone3']: params['num_classes'] = 3 elif params['label_type'] == 'phone4': params['num_classes'] = 4 elif params['label_type'] == 'phone43': params['num_classes'] = 43 # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=input_size * params['num_stack'], splice=params['splice'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle('tf_svc_' + model.name + '_' + params['label_type']) model.name += '_' + str(params['num_units']) model.name += '_' + str(params['num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) # Set save path model.save_path = mkdir_join( model_save_path, 'ctc', params['label_type'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params)
def check(self, decoder_type): print('==================================================') print(' decoder_type: %s' % decoder_type) print('==================================================') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 2 num_stack = 2 inputs, labels, inputs_seq_len = generate_data( label_type='character', model='ctc', batch_size=batch_size, num_stack=num_stack, splice=1) max_time = inputs.shape[1] # Define model graph model = CTC(encoder_type='blstm', input_size=inputs[0].shape[-1], splice=1, num_stack=num_stack, num_units=256, num_layers=2, num_classes=27, lstm_impl='LSTMBlockCell', parameter_init=0.1, clip_grad_norm=5.0, clip_activation=50, num_proj=256, weight_decay=1e-6) # Define placeholders model.create_placeholders() # Add to the graph each operation _, logits = model.compute_loss( model.inputs_pl_list[0], model.labels_pl_list[0], model.inputs_seq_len_pl_list[0], model.keep_prob_pl_list[0]) beam_width = 20 if 'beam_search' in decoder_type else 1 decode_op = model.decoder(logits, model.inputs_seq_len_pl_list[0], beam_width=beam_width) ler_op = model.compute_ler(decode_op, model.labels_pl_list[0]) posteriors_op = model.posteriors(logits, blank_prior=1) if decoder_type == 'np_greedy': decoder = GreedyDecoder(blank_index=model.num_classes) elif decoder_type == 'np_beam_search': decoder = BeamSearchDecoder(space_index=26, blank_index=model.num_classes - 1) # Make feed dict feed_dict = { model.inputs_pl_list[0]: inputs, model.labels_pl_list[0]: list2sparsetensor(labels, padded_value=-1), model.inputs_seq_len_pl_list[0]: inputs_seq_len, model.keep_prob_pl_list[0]: 1.0 } # Create a saver for writing training checkpoints saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('./') # If check point exists if ckpt: model_path = ckpt.model_checkpoint_path saver.restore(sess, model_path) print("Model restored: " + model_path) else: raise ValueError('There are not any checkpoints.') if decoder_type in ['tf_greedy', 'tf_beam_search']: # Decode labels_pred_st = sess.run(decode_op, feed_dict=feed_dict) labels_pred = sparsetensor2list( labels_pred_st, batch_size=batch_size) # Compute accuracy cer = sess.run(ler_op, feed_dict=feed_dict) else: # Compute CTC posteriors probs = sess.run(posteriors_op, feed_dict=feed_dict) probs = probs.reshape(-1, max_time, model.num_classes) if decoder_type == 'np_greedy': # Decode labels_pred = decoder(probs=probs, seq_len=inputs_seq_len) elif decoder_type == 'np_beam_search': # Decode labels_pred, scores = decoder(probs=probs, seq_len=inputs_seq_len, beam_width=beam_width) # Compute accuracy cer = compute_cer(str_pred=idx2alpha(labels_pred[0]), str_true=idx2alpha(labels[0]), normalize=True) # Visualize print('CER: %.3f %%' % (cer * 100)) print('Ref: %s' % idx2alpha(labels[0])) print('Hyp: %s' % idx2alpha(labels_pred[0]))
def main(config_path, model_save_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank class # TODO load vocab.txt num if params['label_type'] == 'character': params['num_classes'] = 4714 else: raise TypeError # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle('tf_libri_' + model.name + '_' + params['train_data_size'] + '_' + params['label_type']) model.name += '_' + str(params['num_units']) model.name += '_' + str(params['num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) if params['bottleneck_dim'] != 0: model.name += '_bottle' + str(params['bottleneck_dim']) if len(gpu_indices) >= 2: model.name += '_gpu' + str(len(gpu_indices)) # Set save path model.save_path = mkdir_join(model_save_path, 'ctc', params['label_type'], params['train_data_size'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) #sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params, gpu_indices=gpu_indices)
def check(self, decoder_type): print('==================================================') print(' decoder_type: %s' % decoder_type) print('==================================================') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 2 num_stack = 2 inputs, labels, inputs_seq_len = generate_data( label_type='character', model='ctc', batch_size=batch_size, num_stack=num_stack, splice=1) max_time = inputs.shape[1] # Define model graph model = CTC(encoder_type='blstm', input_size=inputs[0].shape[-1], splice=1, num_stack=num_stack, num_units=256, num_layers=2, num_classes=27, lstm_impl='LSTMBlockCell', parameter_init=0.1, clip_grad_norm=5.0, clip_activation=50, num_proj=256, weight_decay=1e-6) # Define placeholders model.create_placeholders() # Add to the graph each operation _, logits = model.compute_loss(model.inputs_pl_list[0], model.labels_pl_list[0], model.inputs_seq_len_pl_list[0], model.keep_prob_pl_list[0]) beam_width = 20 if 'beam_search' in decoder_type else 1 decode_op = model.decoder(logits, model.inputs_seq_len_pl_list[0], beam_width=beam_width) ler_op = model.compute_ler(decode_op, model.labels_pl_list[0]) posteriors_op = model.posteriors(logits, blank_prior=1) if decoder_type == 'np_greedy': decoder = GreedyDecoder(blank_index=model.num_classes) elif decoder_type == 'np_beam_search': decoder = BeamSearchDecoder(space_index=26, blank_index=model.num_classes - 1) # Make feed dict feed_dict = { model.inputs_pl_list[0]: inputs, model.labels_pl_list[0]: list2sparsetensor(labels, padded_value=-1), model.inputs_seq_len_pl_list[0]: inputs_seq_len, model.keep_prob_pl_list[0]: 1.0 } # Create a saver for writing training checkpoints saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('./') # If check point exists if ckpt: model_path = ckpt.model_checkpoint_path saver.restore(sess, model_path) print("Model restored: " + model_path) else: raise ValueError('There are not any checkpoints.') if decoder_type in ['tf_greedy', 'tf_beam_search']: # Decode labels_pred_st = sess.run(decode_op, feed_dict=feed_dict) labels_pred = sparsetensor2list(labels_pred_st, batch_size=batch_size) # Compute accuracy cer = sess.run(ler_op, feed_dict=feed_dict) else: # Compute CTC posteriors probs = sess.run(posteriors_op, feed_dict=feed_dict) probs = probs.reshape(-1, max_time, model.num_classes) if decoder_type == 'np_greedy': # Decode labels_pred = decoder(probs=probs, seq_len=inputs_seq_len) elif decoder_type == 'np_beam_search': # Decode labels_pred, scores = decoder(probs=probs, seq_len=inputs_seq_len, beam_width=beam_width) # Compute accuracy cer = compute_cer(str_pred=idx2alpha(labels_pred[0]), str_true=idx2alpha(labels[0]), normalize=True) # Visualize print('CER: %.3f %%' % (cer * 100)) print('Ref: %s' % idx2alpha(labels[0])) print('Hyp: %s' % idx2alpha(labels_pred[0]))
def main(config_path, model_save_path, gpu_indices): # Load a config file (.yml) with open(config_path, "r") as f: config = yaml.load(f) params = config['param'] # Except for a blank label if params['label_type'] == 'kana': params['num_classes'] = 146 elif params['label_type'] == 'kana_divide': params['num_classes'] = 147 elif params['label_type'] == 'kanji': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2981 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3385 elif params['label_type'] == 'kanji_divide': if params['train_data_size'] == 'train_subset': params['num_classes'] = 2982 elif params['train_data_size'] == 'train_fullset': params['num_classes'] = 3386 else: raise TypeError # Model setting model = CTC(encoder_type=params['encoder_type'], input_size=params['input_size'], splice=params['splice'], num_stack=params['num_stack'], num_units=params['num_units'], num_layers=params['num_layers'], num_classes=params['num_classes'], lstm_impl=params['lstm_impl'], use_peephole=params['use_peephole'], parameter_init=params['weight_init'], clip_grad_norm=params['clip_grad_norm'], clip_activation=params['clip_activation'], num_proj=params['num_proj'], weight_decay=params['weight_decay']) # Set process name setproctitle( 'tf_csj_' + model.name + '_' + params['train_data_size'] + '_' + params['label_type']) model.name += '_' + str(params['num_units']) model.name += '_' + str(params['num_layers']) model.name += '_' + params['optimizer'] model.name += '_lr' + str(params['learning_rate']) if params['num_proj'] != 0: model.name += '_proj' + str(params['num_proj']) if params['dropout'] != 0: model.name += '_drop' + str(params['dropout']) if params['num_stack'] != 1: model.name += '_stack' + str(params['num_stack']) if params['weight_decay'] != 0: model.name += '_wd' + str(params['weight_decay']) if params['bottleneck_dim'] != 0: model.name += '_bottle' + str(params['bottleneck_dim']) if len(gpu_indices) >= 2: model.name += '_gpu' + str(len(gpu_indices)) # Set save path model.save_path = mkdir_join( model_save_path, 'ctc', params['label_type'], params['train_data_size'], model.name) # Reset model directory model_index = 0 new_model_path = model.save_path while True: if isfile(join(new_model_path, 'complete.txt')): # Training of the first model have been finished model_index += 1 new_model_path = model.save_path + '_' + str(model_index) elif isfile(join(new_model_path, 'config.yml')): # Training of the first model have not been finished yet model_index += 1 new_model_path = model.save_path + '_' + str(model_index) else: break model.save_path = mkdir(new_model_path) # Save config file shutil.copyfile(config_path, join(model.save_path, 'config.yml')) sys.stdout = open(join(model.save_path, 'train.log'), 'w') # TODO(hirofumi): change to logger do_train(model=model, params=params, gpu_indices=gpu_indices)
def check(self, encoder_type, label_type='character', lstm_impl=None, time_major=True, save_params=False): print('==================================================') print(' encoder_type: %s' % encoder_type) print(' label_type: %s' % label_type) print(' lstm_impl: %s' % lstm_impl) print(' time_major: %s' % str(time_major)) print(' save_params: %s' % str(save_params)) print('==================================================') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 2 splice = 11 if encoder_type in ['vgg_blstm', 'vgg_lstm', 'cnn_zhang', 'vgg_wang', 'resnet_wang', 'cldnn_wang'] else 1 num_stack = 2 inputs, labels, inputs_seq_len = generate_data( label_type=label_type, model='ctc', batch_size=batch_size, num_stack=num_stack, splice=splice) # NOTE: input_size must be even number when using CudnnLSTM # Define model graph num_classes = 27 if label_type == 'character' else 61 model = CTC(encoder_type=encoder_type, input_size=inputs[0].shape[-1] // splice // num_stack, splice=splice, num_stack=num_stack, num_units=256, num_layers=2, num_classes=num_classes, lstm_impl=lstm_impl, parameter_init=0.1, clip_grad_norm=5.0, clip_activation=50, num_proj=256, weight_decay=1e-10, # bottleneck_dim=50, bottleneck_dim=None, time_major=time_major) # Define placeholders model.create_placeholders() learning_rate_pl = tf.placeholder(tf.float32, name='learning_rate') # Add to the graph each operation loss_op, logits = model.compute_loss( model.inputs_pl_list[0], model.labels_pl_list[0], model.inputs_seq_len_pl_list[0], model.keep_prob_pl_list[0]) train_op = model.train(loss_op, optimizer='nestrov', learning_rate=learning_rate_pl) # NOTE: Adam does not run on CudnnLSTM decode_op = model.decoder(logits, model.inputs_seq_len_pl_list[0], beam_width=20) ler_op = model.compute_ler(decode_op, model.labels_pl_list[0]) # Define learning rate controller learning_rate = 1e-4 lr_controller = Controller(learning_rate_init=learning_rate, decay_start_epoch=50, decay_rate=0.9, decay_patient_epoch=10, lower_better=True) if save_params: # Create a saver for writing training checkpoints saver = tf.train.Saver(max_to_keep=None) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Count total parameters if lstm_impl != 'CudnnLSTM': parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M parameters" % (len(parameters_dict.keys()), "{:,}".format(total_parameters / 1000000))) # Make feed dict feed_dict = { model.inputs_pl_list[0]: inputs, model.labels_pl_list[0]: list2sparsetensor(labels, padded_value=-1), model.inputs_seq_len_pl_list[0]: inputs_seq_len, model.keep_prob_pl_list[0]: 1.0, learning_rate_pl: learning_rate } idx2phone = Idx2phone(map_file_path='./phone61.txt') with tf.Session() as sess: # Initialize parameters sess.run(init_op) # Wrapper for tfdbg # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # Train model max_steps = 1000 start_time_step = time.time() for step in range(max_steps): # for debug # encoder_outputs = sess.run( # model.encoder_outputs, feed_dict) # print(encoder_outputs.shape) # Compute loss _, loss_train = sess.run( [train_op, loss_op], feed_dict=feed_dict) # Gradient check # grads = sess.run(model.clipped_grads, # feed_dict=feed_dict) # for grad in grads: # print(np.max(grad)) if (step + 1) % 10 == 0: # Change to evaluation mode feed_dict[model.keep_prob_pl_list[0]] = 1.0 # Compute accuracy ler_train = sess.run(ler_op, feed_dict=feed_dict) duration_step = time.time() - start_time_step print('Step %d: loss = %.3f / ler = %.3f (%.3f sec) / lr = %.5f' % (step + 1, loss_train, ler_train, duration_step, learning_rate)) start_time_step = time.time() # Decode labels_pred_st = sess.run( decode_op, feed_dict=feed_dict) # Visualize try: labels_pred = sparsetensor2list( labels_pred_st, batch_size=batch_size) if label_type == 'character': print('Ref: %s' % idx2alpha(labels[0])) print('Hyp: %s' % idx2alpha(labels_pred[0])) else: print('Ref: %s' % idx2phone(labels[0])) print('Hyp: %s' % idx2phone(labels_pred[0])) except IndexError: if label_type == 'character': print('Ref: %s' % idx2alpha(labels[0])) print('Hyp: %s' % '') else: print('Ref: %s' % idx2phone(labels[0])) print('Hyp: %s' % '') # NOTE: This is for no prediction if ler_train < 0.1: print('Modle is Converged.') if save_params: # Save model (check point) checkpoint_file = './model.ckpt' save_path = saver.save( sess, checkpoint_file, global_step=2) print("Model saved in file: %s" % save_path) break # Update learning rate learning_rate = lr_controller.decay_lr( learning_rate=learning_rate, epoch=step, value=ler_train) feed_dict[learning_rate_pl] = learning_rate