def train_epoch(net, epoch, dataLoader, optimizer, trainF, config): net.train() total_mask_loss = 0.0 dataprocess = tqdm(dataLoader) for batch_item in dataprocess: image, mask = batch_item['image'], batch_item['mask'] if torch.cuda.is_available(): image, mask = image.cuda(device=device_list[0]), mask.cuda( device=device_list[0]) # optimizer.zero将每个parameter的梯度清0 optimizer.zero_grad() # 输出预测的mask out = net(image) # 计算交叉熵loss mask_loss = MySoftmaxCrossEntropyLoss(nbclasses=config.NUM_CLASSES)( out, mask) total_mask_loss += mask_loss.item() mask_loss.backward() optimizer.step() dataprocess.set_description_str("epoch:{}".format(epoch)) dataprocess.set_postfix_str("mask_loss:{:.4f}".format( mask_loss.item())) # 记录数据迭代了多少次 trainF.write("Epoch:{}, mask loss is {:.4f} \n".format( epoch, total_mask_loss / len(dataLoader))) trainF.flush()
def train_epoch(net, epoch, dataLoader, optimizer, trainF, config): # 将模型设置为训练状态,此时对droupout的处理是随机失活,对bn层的处理是归一化 # net.eval(),将模型设置为评估状态,预测的时候使用,此时对dropout的处理的 # 使用全部神经元,并且乘以补偿系数,bn层时使用的是参数在batch下的移动平均 net.train() total_mask_loss = 0.0 dataprocess = tqdm(dataLoader) for batch_item in dataprocess: image, mask = batch_item['image'], batch_item['mask'] if torch.cuda.is_available(): image, mask = image.cuda(device=device_list[0]), mask.cuda(device=device_list[0]) # optimizer.zero将每个parameter的梯度清0 optimizer.zero_grad() # 输出预测的mask out = net(image) # 计算每一类的交叉熵loss mask_loss = MySoftmaxCrossEntropyLoss(nbclasses=config.NUM_CLASSES)(out, mask) # 计算总交叉熵loss total_mask_loss += mask_loss.item() # 反向传播 mask_loss.backward() # 更新参数 optimizer.step() dataprocess.set_description_str("epoch:{}".format(epoch)) dataprocess.set_postfix_str("mask_loss:{:.4f}".format(mask_loss.item())) # 将每次的loss值写入训练的log文件中 trainF.write("Epoch:{}, mask loss is {:.4f} \n".format(epoch, total_mask_loss / len(dataLoader))) trainF.flush()
def train_epoch(net, epoch, dataLoader, optimizer, trainF, config): net.train() total_mask_loss = 0.0 dataprocess = tqdm(dataLoader) # for batch_item in dataprocess: accumulation_steps = 8 grid = GridMask(10, 30, 360, 0.6, 1, 0.8) for i, (batch_item) in enumerate(dataprocess): grid.set_prob(i, 200) image, mask = batch_item['image'], batch_item['mask'] if torch.cuda.is_available(): image, mask = image.cuda(device=device_list[0]), mask.cuda( device=device_list[0]) # optimizer.zero_grad() image = grid(image) out = net(image) mask_loss = MySoftmaxCrossEntropyLoss(nbclasses=config.NUM_CLASSES)( out, mask) # total_mask_loss += loss.item() total_mask_loss += mask_loss.item() / accumulation_steps mask_loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), 0.25) #torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=20, norm_type=2) if ((i + 1) % accumulation_steps) == 0: optimizer.step() # 反向传播,更新网络参数 optimizer.zero_grad() # 清空梯度 # optimizer.step() dataprocess.set_description_str("epoch:{}".format(epoch)) dataprocess.set_postfix_str("mask_loss:{:.4f}".format( mask_loss.item())) trainF.write("Epoch:{}, mask loss is {:.4f} \n".format( epoch, total_mask_loss / len(dataLoader))) trainF.flush()
def train_epoch(net, epoch, dataloader, optimizer, writer, logger, config): #iter = 0 net.train() confusion_matrix = np.zeros((config.NUM_CLASS, config.NUM_CLASS)) total_mask_loss = 0.0 dataprocess = tqdm(dataloader) logging.info("Train Epoch {}:".format(epoch)) for batch_item in dataprocess: image, mask = batch_item['image'], batch_item['mask'] if torch.cuda.is_available(): image, mask = image.cuda(), mask.cuda() optimizer.zero_grad() out = net(image) mask_loss = MySoftmaxCrossEntropyLoss(nbclasses=config.NUM_CLASS)(out, mask) confusion_matrix += get_confusion_matrix( mask, out, mask.size(), config.NUM_CLASS, ) #if iter % 10 == 0: #w.add_scalar('Epoch{}:Loss'.format(epoch), mask_loss.item(), iter) #iter += 1 total_mask_loss += mask_loss.item() mask_loss.backward() optimizer.step() dataprocess.set_description('epoch:{}'.format(epoch)) dataprocess.set_postfix_str('mask loss:{:.4f}'.format( mask_loss.item())) logger.info("\taverage loss is : {:.3f}".format(total_mask_loss / len(dataloader))) #confusion matrix pos = confusion_matrix.sum(0) res = confusion_matrix.sum(1) tp = np.diag(confusion_matrix) IoU_array = (tp / np.maximum(1.0, pos + res - tp)) for i in range(8): print('{} iou is : {:.4f}'.format(i, IoU_array[i])) logger.info("\t {} iou is : {:.4f}".format(i, IoU_array[i])) miou = IoU_array[1:].mean() print('EPOCH mIoU is : {}'.format(miou)) logger.info("Train mIoU is : {:.4f}".format(miou)) with writer as w: w.add_scalar('EPOCH Loss', total_mask_loss / len(dataloader), epoch) w.add_scalar('Train miou', miou, epoch)
def train_epoch(net, epoch, dataLoader, optimizer, trainF, args): logger.info("======start training epoch step=======") net.train() total_mask_loss = 0.0 dataprocess = tqdm(dataLoader) for batch_item in dataprocess: image, mask = batch_item['image'], batch_item['mask'] optimizer.zero_grad() out = net(image) logger.info("train predict shape: {}".format(out.shape)) mask_loss = MySoftmaxCrossEntropyLoss(nbclasses=args.number_class)( out, mask) total_mask_loss += mask_loss.item() mask_loss.backward() # optimizer 进行更新 optimizer.step() dataprocess.set_description_str("epoch:{}".format(epoch)) dataprocess.set_postfix_str("mask_loss:{:.4f}".format( mask_loss.item())) # 记录数据迭代了多少次 trainF.write("Epoch:{}, mask loss is {:.4f} \n".format( epoch, total_mask_loss / len(dataLoader))) trainF.flush()