def train(net, optimizer): curr_iter = 1 for epoch in range(args['last_epoch'] + 1, args['last_epoch'] + 1 + args['epoch_num']): loss_4_record, loss_3_record, loss_2_record, loss_1_record, \ loss_f_record, loss_record = AvgMeter(), AvgMeter(), AvgMeter(), AvgMeter(), AvgMeter(), AvgMeter() train_iterator = tqdm(train_loader, total=len(train_loader)) for data in train_iterator: if args['poly_train']: base_lr = args['lr'] * ( 1 - float(curr_iter) / (args['epoch_num'] * len(train_loader)))**args['lr_decay'] optimizer.param_groups[0]['lr'] = 2 * base_lr optimizer.param_groups[1]['lr'] = 1 * base_lr inputs, labels = data batch_size = inputs.size(0) inputs = Variable(inputs).cuda(device_ids[0]) labels = Variable(labels).cuda(device_ids[0]) optimizer.zero_grad() predict_4, predict_3, predict_2, predict_1, predict_f = net(inputs) loss_4 = L.lovasz_hinge(predict_4, labels) loss_3 = L.lovasz_hinge(predict_3, labels) loss_2 = L.lovasz_hinge(predict_2, labels) loss_1 = L.lovasz_hinge(predict_1, labels) loss_f = L.lovasz_hinge(predict_f, labels) loss = loss_4 + loss_3 + loss_2 + loss_1 + loss_f loss.backward() optimizer.step() loss_record.update(loss.data, batch_size) loss_4_record.update(loss_4.data, batch_size) loss_3_record.update(loss_3.data, batch_size) loss_2_record.update(loss_2.data, batch_size) loss_1_record.update(loss_1.data, batch_size) loss_f_record.update(loss_f.data, batch_size) if curr_iter % 50 == 0: writer.add_scalar('loss', loss, curr_iter) writer.add_scalar('loss_4', loss_4, curr_iter) writer.add_scalar('loss_3', loss_3, curr_iter) writer.add_scalar('loss_2', loss_2, curr_iter) writer.add_scalar('loss_1', loss_1, curr_iter) writer.add_scalar('loss_f', loss_f, curr_iter) log = '[%3d], [%6d], [%.6f], [%.5f], [L4: %.5f], [L3: %.5f], [L2: %.5f], [L1: %.5f], [Lf: %.5f]' % \ (epoch, curr_iter, base_lr, loss_record.avg, loss_4_record.avg, loss_3_record.avg, loss_2_record.avg, loss_1_record.avg, loss_f_record.avg) train_iterator.set_description(log) open(log_path, 'a').write(log + '\n') curr_iter += 1 if epoch in args['save_point']: net.cpu() torch.save(net.module.state_dict(), os.path.join(ckpt_path, exp_name, '%d.pth' % epoch)) net.cuda(device_ids[0]) if epoch >= args['epoch_num']: net.cpu() torch.save(net.module.state_dict(), os.path.join(ckpt_path, exp_name, '%d.pth' % epoch)) print("Optimization Have Done!") return
def train(net, optimizer): curr_iter = 1 for epoch in range(args['last_epoch'] + 1, args['last_epoch'] + 1 + args['epoch_num']): loss_record, loss_b_record, loss_c_record, loss_o_record = AvgMeter( ), AvgMeter(), AvgMeter(), AvgMeter() train_iterator = tqdm(train_loader, total=len(train_loader)) for data in train_iterator: if args['poly_train']: base_lr = args['lr'] * ( 1 - float(curr_iter) / (args['epoch_num'] * len(train_loader)))**args['lr_decay'] optimizer.param_groups[0]['lr'] = 2 * base_lr optimizer.param_groups[1]['lr'] = 1 * base_lr inputs, labels, edges = data batch_size = inputs.size(0) inputs = Variable(inputs).cuda(device_ids[0]) labels = Variable(labels).cuda(device_ids[0]) edges = Variable(edges).cuda(device_ids[0]) optimizer.zero_grad() predict_c, predict_b, predict_o = net(inputs) loss_b = bce(predict_b, edges) loss_c = L.lovasz_hinge(predict_c, labels) loss_o = L.lovasz_hinge(predict_o, labels) loss = loss_b + loss_c + loss_o loss.backward() optimizer.step() loss_record.update(loss.data, batch_size) loss_b_record.update(loss_b.data, batch_size) loss_c_record.update(loss_c.data, batch_size) loss_o_record.update(loss_o.data, batch_size) if curr_iter % 50 == 0: writer.add_scalar('loss', loss, curr_iter) writer.add_scalar('loss_b', loss_b, curr_iter) writer.add_scalar('loss_c', loss_c, curr_iter) writer.add_scalar('loss_o', loss_o, curr_iter) log = '[Epoch: %2d], [Iter: %5d], [%.7f], [Sum: %.5f], [Lb: %.5f], [Lc: %.5f], [Lo: %.5f]' % \ (epoch, curr_iter, base_lr, loss_record.avg, loss_b_record.avg, loss_c_record.avg, loss_o_record.avg) train_iterator.set_description(log) open(log_path, 'a').write(log + '\n') curr_iter += 1 if epoch in args['save_point']: net.cpu() torch.save(net.module.state_dict(), os.path.join(ckpt_path, exp_name, '%d.pth' % epoch)) net.cuda(device_ids[0]) if epoch >= args['epoch_num']: net.cpu() torch.save(net.module.state_dict(), os.path.join(ckpt_path, exp_name, '%d.pth' % epoch)) print("Optimization Have Done!") return
def train(net, optimizer): global best_ber curr_iter = 1 start_time = time.time() for epoch in range(args['last_epoch'] + 1, args['last_epoch'] + 1 + args['epoch_num']): loss_4_record, loss_3_record, loss_2_record, loss_1_record, \ loss_record = AvgMeter(), AvgMeter(), AvgMeter(), AvgMeter(), AvgMeter() train_iterator = tqdm(train_loader, total=len(train_loader)) for data in train_iterator: if args['poly_train']: base_lr = args['lr'] * (1 - float(curr_iter) / float(total_epoch))**args['lr_decay'] optimizer.param_groups[0]['lr'] = 2 * base_lr optimizer.param_groups[1]['lr'] = 1 * base_lr inputs, labels = data batch_size = inputs.size(0) inputs = Variable(inputs).cuda(device_ids[0]) labels = Variable(labels).cuda(device_ids[0]) optimizer.zero_grad() predict_4, predict_3, predict_2, predict_1 = net(inputs) loss_4 = L.lovasz_hinge(predict_4, labels) loss_3 = L.lovasz_hinge(predict_3, labels) loss_2 = L.lovasz_hinge(predict_2, labels) loss_1 = L.lovasz_hinge(predict_1, labels) loss = loss_4 + loss_3 + loss_2 + loss_1 loss.backward() optimizer.step() loss_record.update(loss.data, batch_size) loss_4_record.update(loss_4.data, batch_size) loss_3_record.update(loss_3.data, batch_size) loss_2_record.update(loss_2.data, batch_size) loss_1_record.update(loss_1.data, batch_size) if curr_iter % 50 == 0: writer.add_scalar('loss', loss, curr_iter) writer.add_scalar('loss_4', loss_4, curr_iter) writer.add_scalar('loss_3', loss_3, curr_iter) writer.add_scalar('loss_2', loss_2, curr_iter) writer.add_scalar('loss_1', loss_1, curr_iter) log = '[%3d], [%6d], [%.6f], [%.5f], [L4: %.5f], [L3: %.5f], [L2: %.5f], [L1: %.5f]' % \ (epoch, curr_iter, base_lr, loss_record.avg, loss_4_record.avg, loss_3_record.avg, loss_2_record.avg, loss_1_record.avg) train_iterator.set_description(log) open(log_path, 'a').write(log + '\n') curr_iter += 1 if epoch in args['save_point']: net.cpu() torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, '%d.pth' % epoch)) net.cuda(device_ids[0]) if epoch >= args['epoch_thres'] and epoch % 5 == 0: ber = test(net) print("mean ber of %d epoch is %.5f" % (epoch, ber)) if ber < best_ber: net.cpu() torch.save( net.state_dict(), os.path.join(ckpt_path, exp_name, 'epoch_%d_ber_%.2f.pth' % (epoch, ber))) print("The optimized epoch is %04d" % epoch) net = net.cuda(device_ids[0]).train() if epoch >= args['epoch_num']: net.cpu() torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, '%d.pth' % epoch)) print("Total Training Time: {}".format( str(datetime.timedelta(seconds=int(time.time() - start_time))))) print(exp_name) print("Optimization Have Done!") return