def train(model, train_loader, dev_loader, optimizer, max_epoch, model_dir, encoder, gpu_mode, eval_frequency=500): model.train() train_loss = 0 total_steps = 0 print_freq = 50 dev_loss_list = [] top1 = imsitu_scorer(encoder, 1, 3) top5 = imsitu_scorer(encoder, 5, 3) '''print('init param data check :') for f in model.parameters(): print('init data and size') print(f.data) print(f.data.size())''' for epoch in range(max_epoch): #print('current sample : ', i, img.size(), verb.size(), roles.size(), labels.size()) #sizes batch_size*3*height*width, batch*504*1, batch*6*190*1, batch*3*6*lebale_count*1 for i, (img, verb, roles, labels) in enumerate(train_loader): total_steps += 1 if gpu_mode >= 0: img = torch.autograd.Variable(img.cuda()) roles = torch.autograd.Variable(roles.cuda()) verb = torch.autograd.Variable(verb.cuda()) labels = torch.autograd.Variable(labels.cuda()) else: img = torch.autograd.Variable(img) verb = torch.autograd.Variable(verb) roles = torch.autograd.Variable(roles) labels = torch.autograd.Variable(labels) verb_predict, role_predict = model(img, verb, roles) loss = model.calculate_loss(verb_predict, verb, role_predict, labels) #print('current batch loss = ', loss) optimizer.zero_grad() loss.backward() optimizer.step() '''print('grad check :') for f in model.parameters(): print('data is') print(f.data) print('grad is') print(f.grad)''' train_loss += loss.data target = torch.max(verb, 1)[1] #get correct roles for each gt verb from roles pred target_role_encoding = encoder.get_role_encoding(target) role_pred_for_target = torch.bmm(target_role_encoding, role_predict) top1.add_point(verb_predict, verb, role_pred_for_target, labels) top5.add_point(verb_predict, verb, role_pred_for_target, labels) if total_steps % print_freq == 0: top1_a = top1.get_average_results() top5_a = top5.get_average_results() print("{},{},{}, {} , {}, loss = {:.2f}, avg loss = {:.2f}". format(total_steps - 1, epoch, i, utils.format_dict(top1_a, "{:.2f}", "1-"), utils.format_dict(top5_a, "{:.2f}", "5-"), loss.data[0], train_loss / ((total_steps - 1) % eval_frequency))) if total_steps % eval_frequency == 0: top1, top5, dev_loss = eval(model, dev_loader, encoder, gpu_mode) model.train() top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Dev {} average :{:.2f} {} {}'.format( total_steps - 1, avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) dev_loss_list.append(dev_loss) min_loss = min(dev_loss_list) if min_loss == dev_loss_list[-1]: checkpoint_name = os.path.join( model_dir, '{}_devloss_{}.h5'.format('baseline', len(dev_loss_list))) utils.save_net(checkpoint_name, model) print('New best model saved! {0}'.format(min_loss)) print('current train loss', train_loss) train_loss = 0 print('Epoch ', epoch, ' completed!')
def train(model, train_loader, dev_loader, optimizer, scheduler, max_epoch, model_dir, encoder, gpu_mode, eval_frequency=500): model.train() train_loss = 0 total_steps = 0 print_freq = 50 dev_score_list = [] top1 = imsitu_scorer(encoder, 1, 3) top5 = imsitu_scorer(encoder, 5, 3) '''print('init param data check :') for f in model.parameters(): if f.requires_grad: print(f.data.size())''' for epoch in range(max_epoch): #print('current sample : ', i, img.size(), verb.size(), roles.size(), labels.size()) #sizes batch_size*3*height*width, batch*504*1, batch*6*190*1, batch*3*6*lebale_count*1 mx = len(train_loader) for i, (img, verb, roles, labels) in enumerate(train_loader): #print("epoch{}-{}/{} batches\r".format(epoch,i+1,mx)) , total_steps += 1 if gpu_mode >= 0: img = torch.autograd.Variable(img.cuda()) roles = torch.autograd.Variable(roles.cuda()) verb = torch.autograd.Variable(verb.cuda()) labels = torch.autograd.Variable(labels.cuda()) else: img = torch.autograd.Variable(img) verb = torch.autograd.Variable(verb) roles = torch.autograd.Variable(roles) labels = torch.autograd.Variable(labels) optimizer.zero_grad() verb_predict, role_predict, norm, verb_marginal, best_role_ids = model( img, verb, roles) loss = model.calculate_loss(verb_predict, verb, norm, role_predict, labels) loss.backward() optimizer.step() '''print('grad check :') for f in model.parameters(): print('data is') print(f.data) print('grad is') print(f.grad)''' train_loss += loss.item() top1.add_point(verb_marginal, verb, best_role_ids, labels, roles) top5.add_point(verb_marginal, verb, best_role_ids, labels, roles) if total_steps % print_freq == 0: top1_a = top1.get_average_results() top5_a = top5.get_average_results() print("{},{},{}, {} , {}, loss = {:.2f}, avg loss = {:.2f}". format(total_steps - 1, epoch, i, utils.format_dict(top1_a, "{:.2f}", "1-"), utils.format_dict(top5_a, "{:.2f}", "5-"), loss.item(), train_loss / ((total_steps - 1) % eval_frequency))) if total_steps % eval_frequency == 0: top1, top5, val_loss = eval(model, dev_loader, encoder, gpu_mode) model.train() top1_avg = top1.get_average_results() top5_avg = top5.get_average_results() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Dev {} average :{:.2f} {} {}'.format( total_steps - 1, avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'), utils.format_dict(top5_avg, '{:.2f}', '5-'))) print('Dev loss :', val_loss) dev_score_list.append(avg_score) max_score = max(dev_score_list) if max_score == dev_score_list[-1]: checkpoint_name = os.path.join( model_dir, '{}_devloss_cnngraph_{}.h5'.format( 'baseline', len(dev_score_list))) utils.save_net(checkpoint_name, model) print('New best model saved! {0}'.format(max_score)) print('current train loss', train_loss) train_loss = 0 top1 = imsitu_scorer(encoder, 1, 3) top5 = imsitu_scorer(encoder, 5, 3) del verb_predict, role_predict, loss, img, verb, roles, labels #break scheduler.step() print('Epoch ', epoch, ' completed!')
optimizer, total_loss, freeze_core=False) train_accs.append(list(train_acc.values())) test_acc = test(epoch, testloader, total_loss) test_accs.append(list(test_acc.values())) print(F"\nLearning_rate: {scheduler.get_lr()[0]}") scheduler.step() acc_mean = sum(test_acc.values()) / len(test_acc.values()) if acc_mean > best_acc: print('Saving..') print(F"acc_mean: {acc_mean}, best_acc: {best_acc}") if not os.path.isdir(args.out): print(F"create directory {args.out}") os.mkdir(args.out) save_net(os.path.join(args.out, name_save_file), net.state_dict(), test_acc, epoch) best_acc = acc_mean print("train loss", total_loss.get_train_avgloss(epoch)) print("validate loss", total_loss.get_valid_avgloss(epoch)) print("train acc", train_acc) print("validate acc", test_acc) pd.DataFrame(train_accs, columns=trainloader.keys()).to_csv( os.path.join(args.out, 'train_accs.csv')) pd.DataFrame(test_accs, columns=trainloader.keys()).to_csv( os.path.join(args.out, 'test_accs.csv')) pd.DataFrame(data=total_loss.get_train_avglosses(), columns=trainloader.keys()).to_csv( os.path.join(args.out, 'train_losses.csv')) pd.DataFrame(data=total_loss.get_valid_avglosses(),