Exemplo n.º 1
0
    speller = torch.load(conf['training_parameter']['pretrained_speller_path'])
else:
    global_step = 0
    listener = Listener(**conf['model_parameter'])
    speller = Speller(**conf['model_parameter'])

#las_module = LAS()
#las_module.set_submodules(listener, speller)

# make module run parallel
#las_module = torch.nn.DataParallel(las_module, device_ids=[0, 1, 2 ,3])

optimizer = torch.optim.Adam([{
    'params': listener.parameters()
}, {
    'params': speller.parameters()
}],
                             lr=conf['training_parameter']['learning_rate'])

#optimizer = torch.optim.SGD([{'params':listener.parameters()}, {'params':speller.parameters()}],
#                             lr=conf['training_parameter']['learning_rate'])

print('Optimizer ADAM')
#print('Optimizer SGD')

best_ler = 1.0
record_gt_text = False
log_writer = SummaryWriter(conf['meta_variable']['training_log_dir'] +
                           conf['meta_variable']['experiment_name'])

# Training
Exemplo n.º 2
0
idx2char = {}
with open(conf['meta_variable']['data_path']+'/idx2chap.csv','r') as f:
    for line in f:
        if 'idx' in line:continue
        idx2char[int(line.split(',')[0])] = line[:-1].split(',')[1]

# Load pre-trained model if needed
if conf['training_parameter']['use_pretrained']:
    global_step = conf['training_parameter']['pretrained_step']
    listener = torch.load(conf['training_parameter']['pretrained_listener_path'])
    speller = torch.load(conf['training_parameter']['pretrained_speller_path'])
else:
    global_step = 0
    listener = Listener(**conf['model_parameter'])
    speller = Speller(**conf['model_parameter'])
optimizer = torch.optim.Adam([{'params':listener.parameters()}, {'params':speller.parameters()}],
                              lr=conf['training_parameter']['learning_rate'])

best_ler = 1.0
record_gt_text = False
log_writer = SummaryWriter(conf['meta_variable']['training_log_dir']+conf['meta_variable']['experiment_name'])

# Training
print('Training starts...',flush=True)
while global_step<total_steps:

    # Teacher forcing rate linearly decay
    tf_rate = tf_rate_upperbound - (tf_rate_upperbound-tf_rate_lowerbound)*min((float(global_step)/tf_decay_step),1)


    # Training