def train(**kwargs): """ 训练 """ #根据命令行参数更新配置 opt.parse(kwargs) #可视化 #vis = Visualizer(opt.env) #step1 加载模型: model = getattr(models,opt.model)() if opt.load_model_path: model.load(opt.load_model_path)#加载训练好的参数 if opt.use_gpu:model.cuda() #step2 数据: train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size,shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size,shuffle=True,num_workers=opt.num_workers) #step3:目标函数和优化器 criterion = torch.nn.CrossEntropyLoss() lr = opt.lr optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=opt.weight_decay) #统计指标:平滑处理之后的损失,还有混淆矩阵 #######################待补充######################### #训练 for epoch in range(opt.max_epoch): for ii,(data,label) in enumerate(train_dataloader): #训练模型 if opt.use_gpu: input = input.cuda() target = input.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() #更新统计指标及可视化 if ii%opt.print_freq == opt.print_freq - 1: print('ii:{},loss:{}'.format(ii,loss)) model.save()
def test(**kwargs): opt.parse(kwargs) import ipdb ipdb.set_trace() # configure model model = getattr(models, opt.model)().eval() if os.path.exists(opt.load_model_path): model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # data train_data = DogCat(opt.test_data_root, test=True) test_dataloader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) results = [] for ii, (data, path) in enumerate(test_dataloader): input = Variable(data, volatile=True) if opt.user_gpu: input = input.cuda() score = model(input) probability = F.softmax(score)[:, 0].data.tolist() batch_results = [(path_, probability_) for path_, probability_ in zip(path, probability)] results += batch_results write_csv(results, opt.results_file)
def test(): # 1.加载模型 # model = AlexNet() model = getattr(models, opt.model)() model.to(opt.device) # 2.加载模型数据 model.load_state_dict(t.load(opt.model_path)) # 3.加载测试集数据 test_data = DogCat(root=opt.test_root, train=False, test=True) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_works) # 4.测试集合的结果 result = [] # 5.测试结果 for ii, (data, path) in tqdm(enumerate(test_data_loader)): input = data.to(opt.device) score = model(input) # TODO Softmax知识点 probability = softmax(score, dim=1)[:, 0].detach().tolist() # dog的概率 batch_result = [(path_.item(), probability_) for path_, probability_ in zip(path, probability)] result += batch_result write_csv(result)
def test(**kwargs): opt.parse(kwargs) #模型 model = getattr(models, opt.model)().eval() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model = opt.use_gpu train_data = DogCat(opt.test_data_root, test=True) test_dataloader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) result = [] for ii, (data, path) in enumerate(test_dataloader): input = Variable(data, volatile=True) if opt.use_gpu: input = input.cuda() score = model(input) probability = t.nn.functional.softmax(score)[:, 1].data.tolist() batch_result = [(path_, _probability) for path_, _probability in zip(path, probability)] result += batch_result write_csv(result, opt.result_file) return result
def train(): # 1.选择合适的model model = getattr(models, opt.model)() model.cuda() # 2.得到批量化的数据 train_data = DogCat(root=opt.train_root, train=True) val_data = DogCat(root=opt.train_root, train=False) train_dataloader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_works) val_dataloader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_works) # 3.设置参数 loss_meter = meter.AverageValueMeter() confuse_metrix = meter.ConfusionMeter(2) criterion = nn.CrossEntropyLoss() optimizer = t.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) # optimizer = t.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) writer = SummaryWriter() for epoch in range(opt.epoch_nums): # 设置迭代次数 # 4 执行批量化的数据 print("Epoch :", epoch) loss_meter.reset() confuse_metrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): input1 = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad()#梯度清零 score = model(input1) loss = criterion(score, target) loss.backward() optimizer.step() # 记录数据 confuse_metrix.add(score.detach(), target.detach()) loss_meter.add(loss.item()) if (ii + 1) % 20 == 0: print("loss_meter", loss_meter.value()[0]) writer.add_scalar("Loss", loss_meter.value()[0]) if epoch == 50: opt.lr = 0.001 # 5. 保存网络数据 writer.close() t.save(model.state_dict(), opt.model_path)
def train(): img_dir = '/opt/data/DogCat/train' model = getattr(models, 'ResNet34')(num_classes) print(summary(model, (3, 224, 224), device='cpu')) print('*' * 30) train_data = DogCat(img_dir, train=True) val_data = DogCat(img_dir, train=False) train_dataloader = DataLoader(train_data, 32, shuffle=True, num_workers=0) val_dataloader = DataLoader(val_data, 16, shuffle=False, num_workers=0) criterion = nn.CrossEntropyLoss() lr = 5e-3 weight_decay = 1e-5 optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) for epoch in range(20): for ii, (data, label) in enumerate(train_dataloader): input = Variable(data) target = Variable(label) if use_cuda: input = input.cuda() target = target.cuda() optimizer.zero_grad() output = model(input) loss = criterion(output, target) loss.backward() optimizer.step() loss_meter.add(loss.item()) if ii % 100 == 0: print('Epoch:{},loss:{}'.format(epoch, loss_meter.value()[0])) val_cm, val_accuracy = val(model, val_dataloader) print( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm},val_acc:{val_acc}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr, val_acc=val_accuracy))
def __init__(self, root, filter_nums): self.train_data = DataLoader(DogCat(path=root, is_train=True, transforms=transforms.Compose([ transforms.RandomRotation(10), transforms.ToTensor() ])), batch_size=BATCH_SIZE, shuffle=True, num_workers=0) self.val_data = DataLoader(DogCat(path=root, is_train=False, transforms=transforms.ToTensor()), batch_size=BATCH_SIZE, shuffle=True, num_workers=0) self.model = ConvNet(layer1_filter=filter_nums).to(DEVICE) self.opt = optim.Adam(self.model.parameters()) self.summary = SummaryWriter('./logs')
def test(**kwargs): opt.parse(kwargs) # 建立模型,使用验证模式 model = getattr(models, opt.model)().eval() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: # 模型置入GPU model.cuda() # 获取数据 test_data = DogCat(opt.test_data_root, test=True) test_dataloader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) results = [] for ii, (data, path) in tqdm(enumerate(test_dataloader)): input = t.autograd.Variable(data, volatile=True) if opt.use_gpu: # 数据置入GPU input = input.cuda() # 计算测试集得分 score = model(input) probability = t.nn.functional.softmax(score)[:, 0].data.tolist() if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() # zip:将多个迭代器部分组合成一个元组,然后返回由元组组成的列表 batch_results = [(path_, probability_) for path_, probability_ in zip(path, probability)] results += batch_results # 将结果写入csv文件,文件名由opt.result_file指定 write_csv(results, opt.result_file) return results
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if os.path.exists(opt.load_model_path): model.load(opt.load_model_path) if opt.use_gpu: model.cuda() if os.path.exists(opt.pars_path): dic = load_dict(opt.pars_path) previous_loss = dic['loss'][-1] if 'loss' in dic.keys() else 1e100 else: dic = {} # step2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step2: criterion and optimizer criterion = nn.CrossEntropyLoss() lr = opt.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) #previous_loss = 1e100 # train for epoch in range(5, opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)): #confusion_matrix.reset() # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data.item()) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: dic = save_dict(opt.pars_path, dic, loss_data=loss_meter.value()[0]) #loss_meter.reset() vis.plot('loss', dic['loss_data']) name = model.save() if os.path.exists(opt.debug_file): import ipdb ipdb.set_trave() name = model.save() # update learning: reduce learning rate when loss no longer decrease if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] dic = save_dict(opt.pars_path, dic, name=name, epoch=epoch, lr=lr, loss=loss_meter.value()[0], train_cm=confusion_matrix.value()) # validate and visualize val_cm, val_accuracy = val(model, val_dataloader) dic = save_dict(opt.pars_path, dic, val_accuracy=val_accuracy, val_cm=val_cm.value()) vis.log(dic)
def train(**kwargs): opt.parse(kwargs) #模型 model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() #数据 train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) #定义优化器和优化目标 criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) #统计指标 loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 #训练 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_dataloader): input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) #print('score.dtype=',score.dtype," target.dtype=",target.dtype) loss = criterion(score, target) loss.backward() optimizer.step() #统计更新并可视化 loss_meter.add(loss.item()) runing_loss = loss.item() confusion_matrix.add(score.data, target.data) print('[%d ,%5d] loss=%.3f' % (epoch + 1, ii + 1, runing_loss)) # if ii % opt.print_freq ==opt.print_freq-1: # print('[%d ,%5d] loss=%.3f' %(epoch+1,ii+1,runing_loss/opt.print_freq)) #vis.plot('loss',loss_meter.value()[0]) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() print("***finished %d epoch***\n" % epoch) model.save(counter=epoch) val_cm, val_accuracy = val(model, val_dataloader) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay print('learn:%.4f\n' % lr) # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): # 从配置文件获取opt,将参数赋值到对应的kwargs opt.parse(kwargs) # 指定env=opt.env,默认端口为8097,host为localhost vis = Visualizer(opt.env) # 步骤1:模型参数 # 获取models中的opt.model属性值,决定采用何种网络 # model为网络结构,注意等式右边的括号,如果没有括号,则调用网络需要使用model() model = getattr(models, opt.model)() # 如果有预训练好的网络,则采用之 if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # 步骤2:加载数据 # 加载训练集数据 # 使用自定义数据集类DogCat加载数据,使用train_data.__getitem__(index)获取对应索引的data和label train_data = DogCat(opt.train_data_root, train=True) # 使用PyTorch自带的utils.data中的DataLoader加载数据 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # 加载验证集数据 val_data = DogCat(opt.train_data_root, train=False) # DataLoader是一个可迭代对象,可将dataset返回的每条数据拼接成一个batch,并提供多线程加速优化,当所有数据遍历完一次,对DataLoader完成一次迭代 # DataLoader提供对数据分批,打乱,使用多少子进程加载数据 val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # 步骤3:损失函数和优化器 criterion = t.nn.CrossEntropyLoss() lr = opt.lr # 建立优化器,指定要调整的参数和学习率 optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # 步骤4:meters loss_meter = meter.AverageValueMeter() # 用于统计任意添加的方差和均值,可以用来测量平均损失 # 多类之间的混淆矩阵,初始化时,指定类别数为2,参数normalized指定是否归一化名,归一化后输出百分比,否则输出数值 confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # 训练 for epoch in range(opt.max_epoch): # 重置统计信息 loss_meter.reset() confusion_matrix.reset() # tqdm:Python的进度条工具,输入为一个迭代器 # enumerate:对于一个可迭代的(iterable)/可遍历的对象(如列表、字符串),enumerate将其组成一个索引序列,利用它可以同时获得索引和值 # enumerate多用于在for循环中得到计数 for ii, (data, label) in tqdm(enumerate(train_dataloader)): # 训练模型 # 定义输入输出变量 input = Variable(data) target = Variable(label) # 是否使用gpu if opt.use_gpu: input = input.cuda() target = target.cuda() # 所有参数的梯度清零 optimizer.zero_grad() # 获取得分 score = model(input) # 计算损失 loss = criterion(score, target) # 反向传播 loss.backward() # 更新网络的权重和参数 optimizer.step() # 参数更新和可视化 loss_meter.add(loss.data[0]) # 将loss加入loss_meter计算平均损失 confusion_matrix.add(score.data, target.data) # 判断是否是第 N 个batch,以决定要不要打印信息 if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) # 绘制loss曲线 # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() # 保存模型至checkpoints文件夹 model.save() # 验证和可视化 # 获取验证集混淆矩阵和验证集准确率 val_cm, val_accuracy = val(model, val_dataloader) # 绘制验证集准确率曲线 vis.plot('val_accuracy', val_accuracy) # 输出基本信息,如loss,学习率,混淆矩阵等 vis.log( "epoch:{epoch},\n lr:{lr},\n loss:{loss},\n train_cm:{train_cm},\n val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # 更新学习率 # 若当前损失比之前的要高,则认为学习率过快,需要降速 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: # 更新优化器中的学习率 param_group['lr'] = lr # 更新previous_loss previous_loss = loss_meter.value()[0]