def train(): # BCEWithLogitsLoss、CrossEntropyLoss参考: # https://blog.csdn.net/qq_22210253/article/details/85222093 criterion = nn.BCEWithLogitsLoss() # criterion = nn.CrossEntropyLoss() # optimizer = optim.RMSprop( # model.parameters(), # lr=lr, # momentum=momentum, # weight_decay=w_decay, # ) # optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0) # decay LR by a factor of 0.5 every 30 epochs scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) vis = Visualizer(env='main') vis2 = VisdomLinePlotter(env_name='lossAndAccuracy') bestLoss = np.inf for epoch in range(epochs): model.train() trainTotalLoss = 0. start = time.time() for trainIter, (testImgTensor, labelTensor) in enumerate(trainDataloader): testImgTensor = Variable(testImgTensor) labelTensor = Variable(labelTensor) testImgTensor = testImgTensor.to(device) labelTensor = labelTensor.to(device) optimizer.zero_grad() output = model(testImgTensor) acc = Accuracy(output, labelTensor) trainLoss = criterion(output, labelTensor) trainTotalLoss += trainLoss.data.item() optimizer.zero_grad() trainLoss.backward() optimizer.step() debug = False # 调试使用 if (trainIter + 1) % 10 == 0: meanTotalLoss = trainTotalLoss / (trainIter + 1) print( 'Epoch [%d/%d], Iter [%d/%d] iterLoss: %.4f, epochAccumulatedMeanLoss: %.4f, acc: %.4f%%' % (epoch + 1, epochs, trainIter + 1, len(trainDataloader), trainLoss.data.item(), meanTotalLoss, acc * 100)) vis.plot_train_val(loss_train=meanTotalLoss) # 每十个itera可视化一次 vis2.plot(var_name='loss', split_name='train', title_name='Loss', x=epoch * len(trainDataloader) + trainIter, y=meanTotalLoss) vis2.plot(var_name='acc', split_name='train', title_name='Accuracy', x=epoch * len(trainDataloader) + trainIter, y=acc) if debug and bestLoss > meanTotalLoss and (trainIter + 1) % 10 == 0: bestLoss = meanTotalLoss print('get best test trainLoss %.5f' % bestLoss) torch.save( model.state_dict(), './modelWeights/model-epoch-{}-itera{}-trainLoss-{}.pth'. format(epoch, trainIter, meanTotalLoss)) usedTime = time.time() - start print('This epoch uses %ds.' % usedTime) torch.save( model.state_dict(), './modelWeights/model-epoch-{}-itera{}-trainLoss-{:.3f}.pth'. format(epoch, trainIter, meanTotalLoss)) valTotalLoss = 0.0 model.eval() # dropout层及batch normalization层进入 evalution 模态 with torch.no_grad(): for valIter, (testImgTensor, labelTensor) in enumerate(valDataloader): testImgTensor = Variable(testImgTensor) labelTensor = Variable(labelTensor) testImgTensor = testImgTensor.to(device) labelTensor = labelTensor.to(device) output = model(testImgTensor) acc = Accuracy(output, labelTensor) valLoss = criterion(output, labelTensor) valTotalLoss += valLoss.data.item() meanValLoss = valTotalLoss / len(valDataloader) print('*' * 40) print( 'Epoch [%d/%d], Iter [%d/%d] thisIterLoss: %.4f, EpochAccumulatedMeanLoss: %.4f' % (epoch + 1, epochs, valIter + 1, len(valDataloader), valLoss.data.item(), meanValLoss)) print('*' * 40) vis.plot_train_val(loss_val=meanValLoss) vis2.plot(var_name='loss', split_name='val', title_name='Class Loss', x=epoch * len(trainDataloader) + trainIter, y=meanValLoss) vis2.plot(var_name='acc', split_name='val', title_name='Accuracy', x=epoch * len(trainDataloader) + trainIter, y=acc)
def train(): vis = Visualizer(opt.env) # 网络部分======================================================开始 # True则返回预训练好的VGG16模型 net = vgg16(pretrained=True) # 提取特征层不动 # 修改分类层最后三层全连接层 # 修改vgg的分类层结构 # 修改vgg16的分类部分 net.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), # 取消一层全连接层 # nn.Linear(4096, 4096), # nn.ReLU(True), # nn.Dropout(), # 最后一层修改为1470 即为1470代表一张图的信息(1470=7x7x30) nn.Linear(4096, 1470), ) # 初始化网络的线性层 权重及偏向 for m in net.modules(): if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() # 将模型加载到内存中(CPU) if opt.load_model_path: net.load_state_dict( torch.load(opt.load_model_path, map_location=lambda storage, loc: storage)) # 再将模型转移到GPU上 if opt.use_gpu: net.cuda() # 输出网络结构 print(net) print('加载好预先训练好的模型') # 将模型调整为训练模式 net.train() # 网络部分======================================================结束 # 加载数据部分====================================================开始 # 自定义封装数据集 train_dataset = yoloDataset(root=opt.file_root, list_file=opt.voc_2012train, train=True, transform=[transforms.ToTensor()]) # 数据集加载器 shuffle:打乱顺序 num_workers:线程数 train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4) test_dataset = yoloDataset(root=opt.test_root, list_file=opt.voc_2007test, train=False, transform=[transforms.ToTensor()]) test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=4) # 加载数据部分====================================================结束 #自定义的损失函数 7代表将图像分为7x7的网格 2代表一个网格预测两个框 5代表 λcoord 更重视8维的坐标预测 0.5代表没有object的bbox的confidence loss criterion = yoloLoss(7, 2, 5, 0.5) # 优化器 optimizer = torch.optim.SGD(net.parameters(), lr=opt.learning_rate, momentum=opt.momentum, weight_decay=opt.weight_decay) print('训练集有 %d 张图像' % (len(train_dataset))) print('一个batch的大小为 %d' % (opt.batch_size)) # 将训练过程的信息写入log文件中 logfile = open('log/log.txt', 'w') # inf为正无穷大 best_test_loss = np.inf for epoch in range(opt.num_epochs): if epoch == 1: opt.learning_rate = 0.0005 if epoch == 2: opt.learning_rate = 0.00075 if epoch == 3: opt.learning_rate = 0.001 if epoch == 80: opt.learning_rate = 0.0001 if epoch == 100: opt.learning_rate = 0.00001 for param_group in optimizer.param_groups: param_group['lr'] = opt.learning_rate # 第几次epoch 及 当前epoch的学习率 print('\n\n当前的epoch为 %d / %d' % (epoch + 1, opt.num_epochs)) print('当前epoch的学习率: {}'.format(opt.learning_rate)) # 每轮epoch的总loss total_loss = 0. # 开始训练 for i, (images, target) in enumerate(train_loader): images = Variable(images) target = Variable(target) if opt.use_gpu: images, target = images.cuda(), target.cuda() # 前向传播,得到预测值 pred = net(images) # 计算损失 yoloLoss继承nn.Module,调用方法名即自动进行前向传播,执行forward方法 loss = criterion(pred, target) total_loss += loss.data[0] # 优化器梯度清零 optimizer.zero_grad() #loss反向传播 loss.backward() # 更新参数 optimizer.step() if (i + 1) % opt.print_freq == 0: print( '在训练集上:当前epoch为 [%d/%d], Iter [%d/%d] 当前batch损失为: %.4f, 当前epoch到目前为止平均损失为: %.4f' % (epoch + 1, opt.num_epochs, i + 1, len(train_loader), loss.data[0], total_loss / (i + 1))) # 画出训练集的平均损失 vis.plot_train_val(loss_train=total_loss / (i + 1)) # 保存最新的模型 torch.save(net.state_dict(), opt.current_epoch_model_path) # =========================================================看到此 # 一次epoch验证 validation_loss = 0.0 # 模型调整为验证模式 net.eval() # 每轮epoch之后用VOC2007测试集进行验证 for i, (images, target) in enumerate(test_loader): images = Variable(images, volatile=True) target = Variable(target, volatile=True) if opt.use_gpu: images, target = images.cuda(), target.cuda() # 前向传播得到预测值 pred = net(images) # loss loss = criterion(pred, target) validation_loss += loss.data[0] # 计算在VOC2007测试集上的平均损失 validation_loss /= len(test_loader) # 画出验证集的平均损失 vis.plot_train_val(loss_val=validation_loss) # 训练模型的目标是 在验证集上的loss最小 # 保存到目前为止 在验证集上的loss最小 的模型 if best_test_loss > validation_loss: best_test_loss = validation_loss print('当前得到最好的验证集的平均损失为 %.5f' % best_test_loss) torch.save(net.state_dict(), opt.best_test_loss_model_path) # 将当前epoch的参数写入log文件中 logfile.writelines(str(epoch) + '\t' + str(validation_loss) + '\n') logfile.flush()