def train_lenet(device, dataset_path): train_loader, valid_loader, test_loader = get_data_loaders(dataset_path) model = LeNet(35) optimizer = optim.Adam(model.parameters(), lr=Consts.lr, weight_decay=Consts.weight_decay) loss_criterion = torch.nn.NLLLoss() model.apply(weight_init) model.to(device) train_loss = [] val_loss = [] val_acc = [] for epoch in range(Consts.epochs): t_loss = train(model, train_loader, optimizer, loss_criterion, device) v_loss, v_acc = evaluation(model, valid_loader, loss_criterion, device) torch.save(model.state_dict(), f'models/epoch-{epoch + 1}.pth') train_loss.append(t_loss) val_loss.append(v_loss) val_acc.append(v_acc) print(f'train loss in epoch {epoch + 1} is: {t_loss}') print(f'validation loss in epoch {epoch + 1} is: {v_loss}') print(f'validation accuracy in epoch {epoch + 1} is: {v_acc}') plot_loss(train_loss, val_loss, val_acc) test_loss, test_acc = test_model(model, test_loader, loss_criterion, val_loss, device, 'models/')
def main(): transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # 50000张训练图片 # 第一次使用时要将download设置为True才会自动去下载数据集 train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform) train_loader = torch.utils.data.DataLoader(train_set, batch_size=36, shuffle=True, num_workers=0) # 10000张验证图片 # 第一次使用时要将download设置为True才会自动去下载数据集 val_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform) val_loader = torch.utils.data.DataLoader(val_set, batch_size=5000, shuffle=False, num_workers=0) val_data_iter = iter(val_loader) val_image, val_label = val_data_iter.next() # classes = ('plane', 'car', 'bird', 'cat', # 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') net = LeNet() loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.001) for epoch in range(5): # loop over the dataset multiple times running_loss = 0.0 for step, data in enumerate(train_loader, start=0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = loss_function(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if step % 500 == 499: # print every 500 mini-batches with torch.no_grad(): outputs = net(val_image) # [batch, 10] predict_y = torch.max(outputs, dim=1)[1] accuracy = (predict_y == val_label).sum().item() / val_label.size(0) print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, step + 1, running_loss / 500, accuracy)) running_loss = 0.0 print('Finished Training') save_path = './Lenet.pth' torch.save(net.state_dict(), save_path)
def trainModel(EPOCH_NUM=50, save=False, show=False): print("Train Start:") net = LeNet().to(devices) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=LR, alpha=0.9, eps=1e-08, weight_decay=0, momentum=0, centered=False) #x,trainloss,trainacc,testacc = [],[],[],[] batch, batchloss = [], [] for epoch in range(EPOCH_NUM): sum_loss = 0.0 acc = 0 iter = 0 for i, (inputs, labels) in enumerate(trainLoader): inputs, labels = inputs.to(devices), labels.to(devices) # forward and backward optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() sum_loss += loss.item() _, pred = torch.max(outputs.data, 1) acc += (pred == labels).sum() iter = iter + 1 batch.append(i) batchloss.append(loss.item()) if show == True: plt.figure() plt.plot(batch, batchloss, 'b') plt.title('one epoch') plt.xlabel('iteration') plt.ylabel('loss') plt.show() # trainloss.append(sum_loss/iter) # trainacc.append(100*acc/len(trainData)) # x.append(epoch) print('Epoch [%d] : loss [%f]' % (epoch + 1, sum_loss / iter)) print('train accuracy = %f%%' % (100 * acc / len(trainData))) #with torch.no_grad(): # correct = 0 # total = 0 # for data in testLoader: # images, labels = data # images, labels = images.to(devices), labels.to(devices) # outputs = net(images) # _, predicted = torch.max(outputs.data, 1) # total += labels.size(0) # correct += (predicted == labels).sum() #print('test accuracy = %f%%'%(100*correct/total)) #testacc.append(100*correct/total) if save == True: torch.save(net.state_dict(), 'MNIST_Model.pth')
def gpu_train(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) # 或者 # device = torch.device("cuda") # 或者 # device = torch.device("cpu") net = LeNet() net.to(device) # 将网络分配到指定的device中 loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.001) for epoch in range(5): running_loss = 0.0 time_start = time.perf_counter() for step, data in enumerate(train_loader, start=0): inputs, labels = data optimizer.zero_grad() outputs = net(inputs.to(device)) # 将inputs分配到指定的device中 loss = loss_function(outputs, labels.to(device)) # 将labels分配到指定的device中 loss.backward() optimizer.step() running_loss += loss.item() if step % 1000 == 999: with torch.no_grad(): outputs = net(test_image.to(device)) # 将test_image分配到指定的device中 predict_y = torch.max(outputs, dim=1)[1] accuracy = (predict_y == test_label.to(device)).sum().item() / test_label.size(0) # 将test_label分配到指定的device中 print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, step + 1, running_loss / 1000, accuracy)) print('%f s' % (time.perf_counter() - time_start)) running_loss = 0.0 print('Finished Training') save_path = './Lenet.pth' torch.save(net.state_dict(), save_path)
def main(): class Args(): def __init__(self): pass args = Args() args.batch_size = 64 args.test_batch_size = 2 args.epochs = 10 args.lr = 0.0001 args.momentum = 0.5 args.no_cuda = False args.seed = 1 args.log_interval = 100 args.save_model = True use_cuda = not args.no_cuda and torch.cuda.is_available() # torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset_dir = Path(os.environ['HOME'])/"datasets/mnist" train_dataset = MnistDataset(root_dir=dataset_dir/"train", transform=transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) test_dataset = MnistDataset(root_dir=dataset_dir/"test", transform=transform) test_loader = DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=True, num_workers=4) model = LeNet().to(device) # model = nn.DataParallel(model) optimizer = optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) test(args, model, device, test_loader) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pth")
def usually_train(): net = LeNet() # 定义训练的网络模型 loss_function = nn.CrossEntropyLoss() # 定义损失函数为交叉熵损失函数 optimizer = optim.Adam(net.parameters(), lr=0.001) # 定义优化器(训练参数,学习率) for epoch in range(5): # 一个epoch即对整个训练集进行一次训练 running_loss = 0.0 time_start = time.perf_counter() for step, data in enumerate(train_loader, start=0): # 遍历训练集,step从0开始计算 inputs, labels = data # 获取训练集的图像和标签 optimizer.zero_grad() # 清除历史梯度 # forward + backward + optimize outputs = net(inputs) # 正向传播 loss = loss_function(outputs, labels) # 计算损失 loss.backward() # 反向传播 optimizer.step() # 优化器更新参数 # 打印耗时、损失、准确率等数据 running_loss += loss.item() if step % 1000 == 999: # print every 1000 mini-batches,每1000步打印一次 with torch.no_grad(): # 在以下步骤中(验证过程中)不用计算每个节点的损失梯度,防止内存占用 outputs = net(test_image) # 测试集传入网络(test_batch_size=10000),output维度为[10000,10] predict_y = torch.max(outputs, dim=1)[1] # 以output中值最大位置对应的索引(标签)作为预测输出 accuracy = (predict_y == test_label).sum().item() / test_label.size(0) print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' % # 打印epoch,step,loss,accuracy (epoch + 1, step + 1, running_loss / 1000, accuracy)) print('%f s' % (time.perf_counter() - time_start)) # 打印耗时 running_loss = 0.0 print('Finished Training') # 保存训练得到的参数 save_path = './Lenet.pth' torch.save(net.state_dict(), save_path)
#lr=0.01 criterion=nn.CrossEntropyLoss() optimizer=optim.SGD(model.parameters(),lr=args.lr,momentum=0.9,weight_decay=5e-4) epoch=5 for epoch_id in range(epoch): train_loss=0 correct=0 total=0 for batch_idx, (inputs,targets) in enumerate(data_train_loader): optimizer.zero_grad() outputs=model(inputs) loss=criterion(outputs,targets) loss.backward() optimizer.step() train_loss+=loss.item() _,predicted=outputs.max(1) total+=targets.size(0) correct+=predicted.eq(targets).sum().item() print(epoch_id, batch_idx,len(data_train_loader),'Loss:%.3f|Acc:%.3f%%(%d/%d)'%(train_loss/(batch_idx+1),100.*correct/total,correct,total)) save_info={ "iter_num":epoch, #迭代步数 "optimizer":optimizer.state_dict(), #优化器的状态字典 "model":model.state_dict(), #模型的状态字典 } save_path="./model_save/model.pth" #保存信息 torch.save(save_info,save_path)
def train(): device = torch.device("cuda:0" if opt.cuda else "cpu") utils.set_seed() # ============================ step 1/5 数据 ============================ norm_mean = [0.485, 0.456, 0.406] norm_std = [0.229, 0.224, 0.225] train_transform = transforms.Compose([ transforms.Resize((32, 32)), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std), ]) valid_transform = transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std), ]) # 构建MyDataset train_data = RMBDataset(data_dir=opt.train_dir, transform=train_transform) valid_data = RMBDataset(data_dir=opt.valid_dir, transform=valid_transform) # 构建DataLoader train_loader = DataLoader(dataset=train_data, batch_size=opt.batch_size, shuffle=True) valid_loader = DataLoader(dataset=valid_data, batch_size=opt.batch_size) # ============================ step 2/5 模型 ============================ net = LeNet(classes=2) net.to(device) # net.initialize_weights() # ============================ step 3/5 损失函数 ============================ criterion = nn.CrossEntropyLoss() # ============================ step 4/5 优化器 ============================ optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # ============================ step 5/5 训练 ============================ train_curve = list() valid_curve = list() for epoch in range(opt.epochs): loss_mean = 0. correct = 0. total = 0. net.train() for i, data in enumerate(train_loader): inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) outputs = net(inputs) optimizer.zero_grad() loss = criterion(outputs, labels) loss.backward() optimizer.step() # 统计分类情况 _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).squeeze().sum().to("cpu").numpy() # 打印训练信息 loss_mean += loss.item() train_curve.append(loss.item()) if (i + 1) % opt.log_interval == 0: loss_mean = loss_mean / opt.log_interval print( "Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}" .format(epoch, opt.epochs, i + 1, len(train_loader), loss_mean, correct / total)) loss_mean = 0. scheduler.step() # 更新学习率 if (epoch + 1) % opt.val_interval == 0: correct_val = 0. total_val = 0. loss_val = 0. net.eval() with torch.no_grad(): for j, data in enumerate(valid_loader): inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) outputs = net(inputs) loss = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) total_val += labels.size(0) correct_val += ( predicted == labels).squeeze().sum().to("cpu").numpy() loss_val += loss.item() valid_curve.append(loss_val) print( "Valid:\t Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}" .format(epoch, opt.epochs, j + 1, len(valid_loader), loss_val, correct / total)) utils.loss_picture(train_curve, train_loader, valid_curve, opt.val_interval) # 保存模型参数 net_state_dict = net.state_dict() torch.save(net_state_dict, opt.path_state_dict) print("模型保存成功")
momentum=args.momentum) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) best_valid_loss = np.inf iteration = 0 epoch = 1 if not os.path.isfile( '/content/drive/My Drive/PyTorch_Classifier/classifier.pt'): #save model model_save_name = 'classifier.pt' path = F"/content/drive/My Drive/PyTorch_Classifier/{model_save_name}" torch.save(model.state_dict(), path) #load model model_save_name = 'classifier.pt' path = F"/content/drive/My Drive/PyTorch_Classifier/{model_save_name}" model.load_state_dict(torch.load(path)) # trainint with early stopping while (epoch < args.epochs + 1) and (iteration < args.patience): train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval) valid_loss = test(valid_loader, model, args.cuda) if valid_loss > best_valid_loss: iteration += 1 print('Loss was not improved, iteration {0}'.format(str(iteration))) else: print('Saving model...')
def train(): save_flag = True trainloader = get_dataset(r'H:/DataSet_All/猫狗识别/gpu/train', batch_size=64, imageindex=0) validationloader = get_dataset(r'H:/DataSet_All/猫狗识别/gpu/test', batch_size=64, imageindex=0) save_dir = 'output/' my_model = LeNet() my_model = model_init(my_model) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(my_model.parameters(), lr=0.001) epochs = 300 # 训练 for epoch in range(epochs): loss_list = [] acc_list = [] set_learning_rate(optimizer, epoch) learning_rate = optimizer.param_groups[0]['lr'] tq = tqdm.tqdm(trainloader, desc='train') tq.set_description('train Epoch{} lr{}'.format( epoch, learning_rate)) for images, labels in tq: images = images.to(device) labels = labels.to(device) outputs = my_model(images) loss = criterion(outputs, labels) my_model.zero_grad() loss.backward() optimizer.step() loss_list.append(loss.item()) loss_ave = sum(loss_list) / len(loss_list) _, predicted = torch.max(outputs.data, 1) accuracy = (predicted == labels).sum().float() / labels.size(0) acc_list.append(accuracy) acc_ave = sum(acc_list) / len(acc_list) tq.set_postfix( loss="%.4f accuracy:%.4f loss_ave:%.5f acc_ave:%.5f " % (loss.item(), accuracy, loss_ave, acc_ave)) if save_flag: log = "\ntrain \tEpoch {}/{} \t Learning rate: {:.5f} \t Train loss_ave: {:.5f} \t acc_ave: {:.5f} \t " \ .format(epoch, epochs, learning_rate, loss_ave, acc_ave ) # print(log) logFile = open(save_dir + '/log.txt', 'a') logFile.write(log + '\n') torch.save(my_model.state_dict(), save_dir + '/model_lastest.pt') if epoch % 1 == 0: loss_list = [] acc_list = [] with torch.no_grad(): tq = tqdm.tqdm(validationloader, desc='teat') for images, labels in tq: images = images.to(device) labels = labels.to(device) outputs = my_model(images) validation_loss = criterion(outputs, labels) loss_list.append(validation_loss.item()) _, predicted = torch.max(outputs.data, 1) accuracy = (predicted == labels).sum().float() / labels.size(0) acc_list.append(accuracy) acc_ave = sum(acc_list) / len(acc_list) loss_ave = sum(loss_list) / len(loss_list) tq.set_postfix( test_loss= "%.4f acc:%.4f loss_ave:%.5f acc_ave:%.5f " % (validation_loss, accuracy, loss_ave, acc_ave)) log = "\ntest \tEpoch {}/{} \t Learning rate: {:.5f} \t Train loss_ave: {:.5f} \t acc_ave: {:.5f} \t " \ .format(epoch, epochs, learning_rate, loss_ave, acc_ave) # print(log) logFile = open(save_dir + '/log.txt', 'a') logFile.write(log + '\n')
#### for batch_idx, (inputs, targets) in enumerate(data_train_loader): output = model(inputs) ##识别10个手写数字(0~9),因此output输出10个概率值 loss = loss_define(output, targets) ##前向传播计算出损失 loss.backward() ##对损失进行反向传播 optimizer.step() ##根据学习率等超参数进行梯度更新(本文使用Adam) ### 未完待续,8月4日 train_loss += loss.item() # 该步的训练损失 _, predict = output.max(1) # predict输出的将是output中的最大的一个概率 total += targets.size(0) # 参与训练的总样本数 correct += predict.eq(targets).sum().item() #预测正确的数量(使output与target对比) print( batch_idx, len(data_train_loader), 'Loss: %.3f | Acc: %.3f%%(%d,%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) loss_plot.append(train_loss / (batch_idx + 1)) plt.plot(range(epoch_num), loss_plot, '--') plt.show() ## 模型保存 save_info = { "epoch_num": epoch_num, "optimizer": optimizer.state_dict(), "model": model.state_dict(), } # save_dict = model.state_dict() torch.save(save_info, 'D:/数据结构学习/LeNet_master/model_save/model.pth')
for i, data in enumerate(train_loader): inputs, labels = data # 若CUDA可用 可将cpu改成CUDA inputs, labels = Variable(inputs).cpu(), Variable(labels).cpu() optimizer.zero_grad() # 梯度归零 outputs = net(inputs) # 将数据传入网络进行前向运算 loss = criterion(outputs, labels) # 得到损失函数 loss.backward() # 反向传播 optimizer.step() # 优化更新 sum_loss += loss.item() if i % 100 == 99: print('epoch:%d, step:%d, loss:%.03f' % (epoch + 1, i + 1, sum_loss / 100)) sum_loss = 0.0 print("train finished, model saved.") torch.save(net.state_dict(), './model_save/LeNet.pth') print("-----------------start testing-----------------") net.eval() # 将模型变换为测试模式 correct = 0 total = 0 for data_test in test_loader: images, labels = data_test images, labels = Variable(images).cpu(), Variable(labels).cpu() output_test = net(images) _, predicted = torch.max(output_test, 1) total += labels.size(0) correct += (predicted == labels).sum() print("Test finished. Test acc = {0}".format(correct.item() / len(test_data_set)))
if args.lenet: model = LeNet() else: model = Net() if args.cuda: model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) model.train() for epoch in range(args.epochs): for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0])) if args.lenet: torch.save(model.state_dict(), "mnist_lenet.pt") else: torch.save(model.state_dict(), "mnist_cnn.pt") validate(model)
num_batches = len(train_loader) for epoch in range(num_epochs): for idx, (inputs, labels) in enumerate(train_loader): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() if ((idx + 1) % 100 == 0): print("epoch is {}/{} Step is: {}/{} loss is: {}".format( epoch, num_epochs, idx, num_batches, loss.item())) model.eval() with torch.no_grad(): correct = 0 total = 0 for idx, (inputs, labels) in enumerate(test_loader): inputs = inputs.to(device) labels = labels.to(device) preds = model(inputs) values, indices = torch.max(preds, 1) total += labels.shape[0] correct += (labels == indices).sum().item() print("Accuracy of the network is: {}%".format(100 * correct / total)) torch.save(model.state_dict(), 'model.pth')
running_loss = 0.0 for step, data in enumerate(train_loader, start=0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = loss_function(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if step % 500 == 499: # print every 500 mini-batches with torch.no_grad(): outputs = net(val_image) # [batch, 10] predict_y = torch.max(outputs, dim=1)[1] accuracy = (predict_y == val_label).sum().item() / val_label.size(0) print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, step + 1, running_loss / 500, accuracy)) running_loss = 0.0 print('Finished Training') save_path = './Alexnet.pth' torch.save(net.state_dict(), save_path)
lr=args.lr, momentum=args.momentum) best_valid_loss = np.inf iteration = 0 epoch = 1 # trainint with early stopping while (epoch < args.epochs + 1) and (iteration < args.patience): train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval) valid_loss = test(valid_loader, model, args.cuda) if valid_loss > best_valid_loss: iteration += 1 print('Loss was not improved, iteration {0}'.format(str(iteration))) else: print('Saving model...') iteration = 0 best_valid_loss = valid_loss state = { 'net': model.module if args.cuda else model, 'acc': valid_loss, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(model.state_dict(), './checkpoint/ckpt.t7') epoch += 1 # test model test(test_loader, model, args.cuda)
# trainint with early stopping while (epoch < args.epochs + 1) and (iteration < args.patience): train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval) print('Epoch {0}'.format(str(epoch))) valid_loss, acc = test(valid_loader, model, args.cuda, is_valid=True) acc_save = 1-acc if acc_save > best_valid_loss: iteration += 1 print('Loss was not improved, iteration {0}'.format(str(iteration))) else: print('Saving model...') iteration = 0 best_valid_loss = acc_save state = { 'net': model.module.state_dict() if args.cuda else model.state_dict(), 'acc': valid_loss, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/{}.t7'.format(args.arc)) epoch += 1 # test model checkpoint = torch.load('./checkpoint/{}.t7'.format(args.arc),map_location = lambda storage, loc: storage) if args.arc == 'LeNet': test_model = LeNet(num_classes=num_classes) elif args.arc.startswith('VGG'): test_model = VGG(args.arc, num_classes=num_classes)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--dataset', type=str, default="mnist", choices=["mnist", "cifar10"], metavar='D', help='training dataset (mnist or cifar10)') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--percent', type=list, default=[0.8, 0.92, 0.991, 0.93], metavar='P', help='pruning percentage (default: 0.8)') parser.add_argument('--alpha', type=float, default=5e-4, metavar='L', help='l2 norm weight (default: 5e-4)') parser.add_argument('--rho', type=float, default=1e-2, metavar='R', help='cardinality weight (default: 1e-2)') parser.add_argument( '--l1', default=False, action='store_true', help='prune weights with l1 regularization instead of cardinality') parser.add_argument('--l2', default=False, action='store_true', help='apply l2 regularization') parser.add_argument('--num_pre_epochs', type=int, default=3, metavar='P', help='number of epochs to pretrain (default: 3)') parser.add_argument('--num_epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--num_re_epochs', type=int, default=3, metavar='R', help='number of epochs to retrain (default: 3)') parser.add_argument('--lr', type=float, default=1e-3, metavar='LR', help='learning rate (default: 1e-2)') parser.add_argument('--adam_epsilon', type=float, default=1e-8, metavar='E', help='adam epsilon (default: 1e-8)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--structured', action='store_true', default=False, help='Enabling Structured Pruning') parser.add_argument('--test', action='store_true', default=False, help='For Testing the current Model') parser.add_argument( '--stat', action='store_true', default=False, help='For showing the statistic result of the current Model') parser.add_argument('--n1', type=int, default=2, metavar='N', help='ReRAM OU size (row number) (default: 2)') parser.add_argument('--n2', type=int, default=2, metavar='N', help='ReRAM OU size (column number) (default: 2)') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} if args.dataset == "mnist": train_loader = torch.utils.data.DataLoader(datasets.MNIST( 'data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) else: args.percent = [0.8, 0.92, 0.93, 0.94, 0.95, 0.99, 0.99, 0.93] args.num_pre_epochs = 5 args.num_epochs = 20 args.num_re_epochs = 5 train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( 'data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768)) ])), shuffle=True, batch_size=args.batch_size, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10('data', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768)) ])), shuffle=True, batch_size=args.test_batch_size, **kwargs) model = LeNet().to(device) if args.dataset == "mnist" else AlexNet().to( device) optimizer = PruneAdam(model.named_parameters(), lr=args.lr, eps=args.adam_epsilon) structured_tag = "_structured{}x{}".format( args.n1, args.n2) if args.structured else "" model_file = "mnist_cnn{}.pt".format(structured_tag) if args.dataset == "mnist" \ else 'cifar10_cnn{}.pt'.format(structured_tag) if args.stat or args.test: print("=> loading model '{}'".format(model_file)) if os.path.isfile(model_file): model.load_state_dict(torch.load(model_file)) print("=> loaded model '{}'".format(model_file)) if args.test: test(args, model, device, test_loader) if args.stat: show_statistic_result(args, model) else: print("=> loading model failed '{}'".format(model_file)) else: checkpoint_file = 'checkpoint{}.pth.tar'.format( "_mnist" if args.dataset == "mnist" else "_cifar10") if not os.path.isfile(checkpoint_file): pre_train(args, model, device, train_loader, test_loader, optimizer) torch.save( { 'epoch': args.num_pre_epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, checkpoint_file) else: print("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}'".format(checkpoint_file)) train(args, model, device, train_loader, test_loader, optimizer) mask = apply_l1_prune(model, device, args) if args.l1 else apply_prune( model, device, args) print_prune(model) test(args, model, device, test_loader) retrain(args, model, mask, device, train_loader, test_loader, optimizer) if args.save_model: torch.save(model.state_dict(), model_file)
def main(args): check_path(args) # CIFAR-10的全部类别,一共10类 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # 数据集 data_builder = DataBuilder(args) dataSet = DataSet(data_builder.train_builder(), data_builder.test_builder(), classes) # 选择模型 if args.lenet: net = LeNet() model_name = args.name_le elif args.vgg: net = Vgg16_Net() model_name = args.name_vgg else: raise "Sorry, you can only select LeNet or VGG." # 交叉熵损失函数 criterion = nn.CrossEntropyLoss() # SGD优化器 optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=args.sgd_momentum, weight_decay=args.weight_decay) # 模型的参数保存路径,默认为 "./model/state_dict" model_path = os.path.join(args.model_path, model_name) # 指定在GPU / CPU上运行程序 device = t.device("cuda:0" if ( t.cuda.is_available() and not args.no_cuda) else "cpu") # 启动训练 if args.do_train: print("Training...") trainer = Trainer(net, criterion, optimizer, dataSet.train_loader, args) trainer.train(epochs=args.epoch) # 只保存模型参数 t.save(net.state_dict(), model_path) # 启动测试 if args.do_eval: if not os.path.exists(model_path): print( "Sorry, there's no saved model yet, you need to train first.") return print("Testing...") device = t.device("cuda:0" if t.cuda.is_available() else "cpu") net.load_state_dict(t.load(model_path, map_location=device)) # net.eval() tester = Tester(dataSet.test_loader, net, args) tester.test() if args.show_model: if not os.path.exists(model_path): print( "Sorry, there's no saved model yet, you need to train first.") return show_model(args) if args.do_predict: net.load_state_dict(t.load(model_path, map_location=device)) predictor = Predictor(net, classes) # img_path = 'test' # img_name = [os.path.join(img_path, x) for x in os.listdir(img_path)] # for img in img_name: # predictor.predict(img) img_path = 'test/cat0.jpg' predictor.predict(img_path)
ave_loss = ave_loss * 0.9 + loss.item() * 0.1 loss.backward() optimizer.step() writer.add_scalar('train/loss', loss, epoch * len(train_loader) + batch_idx) writer.add_scalar('train/acc', acc_train, epoch * len(train_loader) + batch_idx) if (batch_idx + 1) % 10 == 0 or (batch_idx + 1) == len(train_loader): print( '==>>> epoch: {}, batch index: {}, train loss: {:.6f}, acc: {:.3f}' .format(epoch, batch_idx + 1, ave_loss, acc_train)) checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } os.makedirs("./weights/{}".format(date), exist_ok=True) torch.save(checkpoint, "./weights/{}/checkpoint_{}.pt".format(date, epoch)) correct_cnt, ave_loss = 0, 0 total_cnt = 0 acc_val = [] loss_val = [] with torch.no_grad(): model.eval() for batch_idx, (x, target) in enumerate(test_loader): x = x.to(device)
for i in range(train_len): if val_label[i] in top5_val_preds[i]: top5_valid_correct += 1 epoch_loss = train_loss // total_batch_train epoch_acc = 100 * train_correct // train_total #altered top5_epoch_acc = 100 * top5_train_correct // train_total top5_val_epoch_acc = 100 * top5_valid_correct // valid_total train_loss_log.append(epoch_loss) train_accuracy_log.append(epoch_acc) top5_train_accuracy_log.append(top5_epoch_acc) val_epoch_loss = validation_loss // total_batch_val val_epoch_acc = 100 * validation_correct // valid_total #altered valid_loss_log.append(val_epoch_loss) valid_accuracy_log.append(val_epoch_acc) top5_valid_accuracy_log.append(top5_val_epoch_acc) print("===================================================") print(f'[epoch: {epoch + 1}]') print( f'training loss: {epoch_loss:.4f}, training accuracy: {epoch_acc:.2f} %(top1) {top5_epoch_acc:.2f}%(top5)' ) print( f'validation loss: {val_epoch_loss:.4f}, validation accuracy: {val_epoch_acc:.2f}%(top1) {top5_val_epoch_acc:.2f}%(top5)' ) # Save model torch.save(model.state_dict(), f'./weight/mnist_{epochs}.pth')
mode='hard', start_img=args.start_img, num_img=args.num_img, sigma=args.sigma, beta=args.beta, matfile=(None if matdir is None else os.path.join( matdir, '{}.mat'.format(epoch)))) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) if ckptdir is not None: # Save checkpoint print('==> Saving {}.pth..'.format(epoch)) try: state = { 'net': base_model.state_dict(), 'epoch': epoch, } torch.save(state, '{}/{}.pth'.format(ckptdir, epoch)) except OSError: print('OSError while saving {}.pth'.format(epoch)) print('Ignoring...') else: # Test routine certify(model, device, testset, transform_test, num_classes, mode='both',
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) batch_size = 16 epochs = 200 data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), ".")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format( image_path) train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # create model net = LeNet(num_classes=5) # load pretrained weights # download url: https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth # download url: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth # model_weight_path = "weights/LeNet_pretrained.pth" # assert os.path.exists(model_weight_path), "file {} dose not exist.".format(model_weight_path) # pre_weights = torch.load(model_weight_path, map_location=device) # delete classifier weights # pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()} # missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False) # # # freeze features weights # for param in net.conv_stem.parameters(): # param.requires_grad = False # # for param in net.bn1.parameters(): # param.requires_grad = False # # for param in net.act1.parameters(): # param.requires_grad = False # # for param in net.blocks.parameters(): # param.requires_grad = False net.to(device) # define loss function loss_function = nn.CrossEntropyLoss() # construct an optimizer params = [p for p in net.parameters() if p.requires_grad] optimizer = optim.Adam(params, lr=0.0001) best_acc = 0.0 save_path = 'weights/lenet.pth' train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() logits = net(images.to(device)) loss = loss_function(logits, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) # loss = loss_function(outputs, test_labels) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs) val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training')
def main(): transform = transforms.Compose( # ^ transforms.Compose将使用的预处理方法打包成一个整体 [ transforms.ToTensor(), # ToTensor自己查看定义:将 H*W*C的0~255的图像变为 C*H*W的0~1的张量 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # 标准化, Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n`` # 50000张训练图片 # 第一次使用时要将download设置为True才会自动去下载数据集 # torchvision.datasets里面有很多数据集,这里使用的是CIFAR10 train_set = torchvision.datasets.CIFAR10( root='./data', train=True, # root指定位置;train=True表示导入训练集 download=False, transform=transform) # 使用的预处理方法transform # 将训练集导入,并分为一个个批次 train_loader = torch.utils.data.DataLoader(train_set, batch_size=36, shuffle=True, num_workers=0) # shuffle=True是打乱,随机提取到batch; # ^ num_workers载入的线程数,win下只能为0 # 10000张验证图片 # 第一次使用时要将download设置为True才会自动去下载数据集 val_set = torchvision.datasets.CIFAR10( root='./data', train=False, # root指定位置;train=False表示导入测试集 download=False, transform=transform) val_loader = torch.utils.data.DataLoader(val_set, batch_size=5000, shuffle=False, num_workers=0) val_data_iter = iter(val_loader) # ^ iter 变成一个可迭代对象;然后通过.next()就可以获取到一批数据 val_image, val_label = val_data_iter.next() # 得到了测试图像及对应的标签值 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # 元祖类型 # ! # 如果想查看图片,使用下面的代码; # # 参考官方文档 https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py # def imshow(img): # img = img / 2 + 0.5 # unnormalize,对图像反标准化处理; transforms.Normalize是对图像均值-0.5,方差是0.5;所以这里反向操作 # npimg = img.numpy() # 将图像从tensor变为numpy格式 # plt.imshow(np.transpose(npimg, (1, 2, 0))) # 维度还原,tensor的C*H*W变为numpy的H*W*C # plt.show() # # print labels # # print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size))) # print(' '.join('%5s' % classes[val_label[j]] for j in range(4))) # 不要太大了,就显示4张图片 # # show images # # imshow(torchvision.utils.make_grid(images)) # imshow(torchvision.utils.make_grid(val_image)) net = LeNet() # 实例化模型 loss_function = nn.CrossEntropyLoss( ) # 定义损失函数,CrossEntropyLoss里面其实就有了softmax函数 # This criterion combines :class:`~torch.nn.LogSoftmax` and :class:`~torch.nn.NLLLoss` in one single class optimizer = optim.Adam(net.parameters(), lr=0.001) # 定义优化器:传入需要训练的网络参数net.parameters() for epoch in range( 5): # loop over the dataset multiple times; epoch表示迭代多少轮 running_loss = 0.0 # running_loss累计损失 for step, data in enumerate(train_loader, start=0): # 遍历训练集样本 # enumerate函数,不仅返回每一个批次的data,还会返回对应的索引;start=0,索引从0开始 # get the inputs; data is a list of [inputs, labels] inputs, labels = data # 分离成图像和标签 # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) # 正向传播,得到输出 loss = loss_function(outputs, labels) # 通过真实值和输出值得到loss loss.backward() # 通过loss进行反向传播 optimizer.step() # 使用optimizer.step()更新参数 # print statistics 打印过程 running_loss += loss.item() # 每次计算好loss,都累加如running_loss if step % 500 == 499: # print every 500 mini-batches;每500步打印一次信息 with torch.no_grad(): # 接下来不要计算每个节点误差损失梯度(测试集) # ~ 这样可以节省算力和内存;如果不用torch.no_grad这个函数,在测试阶段内存很可能崩溃 outputs = net( val_image) # 输出的维度为[batch, 10],第0个维度表示batch,第1个维度是类别 predict_y = torch.max(outputs, dim=1)[1] # 在第1个维度上找到输出最大的index # ^ max返回两个值,第0个是数值,第1个是位置;所以有 [1] accuracy = torch.eq( predict_y, val_label).sum().item() / val_label.size(0) # 比较预测的标签类别和真实的标签类别,并统计求和 # ^ 都是在tensor中计算的,所以通过.item()得到相应的数值 # 最后除以测试样本的数目得到准确率 print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, step + 1, running_loss / 500, accuracy)) # 训练的第epoch轮,该轮的step步,平均训练误差(因为每500步打印一次),准确率 running_loss = 0.0 # running_loss清零,进行下个500steps的测试 print('Finished Training') # 全部训练完后,打印该信息 save_path = './Lenet.pth' torch.save(net.state_dict(), save_path) # 保存训练的模型,torch.save保存所有的参数