def train(): transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(size=32), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=True, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') model = VGG(vars(args)) optimizer = torch.optim.SGD(model.parameters(), lr=args.lrate, momentum=0.9, weight_decay=5e-4) if args.use_cuda: model = model.cuda() if args.eval: model.load_state_dict(torch.load(args.model_dir)) model.eval() accuracy = model.evaluate(testloader) exit() total_size = len(trainloader) lrate = args.lrate best_score = 0.0 scores = [] for epoch in range(1, args.epochs + 1): model.train() for i, (image, label) in enumerate(trainloader): loss = model(image, label) model.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: print('Epoch = %d, step = %d / %d, loss = %.5f lrate = %.5f' % (epoch, i, total_size, loss, lrate)) model.eval() accuracy = model.evaluate(testloader) scores.append(accuracy) with open(args.model_dir + "_scores.pkl", "wb") as f: pkl.dump(scores, f) if best_score < accuracy: best_score = accuracy print('saving %s ...' % args.model_dir) torch.save(model.state_dict(), args.model_dir) if epoch % args.decay_period == 0: lrate *= args.decay for param_group in optimizer.param_groups: param_group['lr'] = lrate
masks = masks.reshape(weightshape) weights = masks * weights return torch.from_numpy(weights).cuda(), masks, num, count, maskcount # prune weights # The pruned weight location is saved in the addressbook and maskbook. # These will be used during training to keep the weights zero. addressbook = [] maskbook = [] totwts = 0 totpru = 0 totmcount = 0 for k, v in net.state_dict().items(): if "weight" in k: # "conv2" addressbook.append(k) print("pruning layer:", k) weights = v weights, masks, tot_wts, totpru_wts, tot_mcnt = prune_weights(weights) maskbook.append(masks) checkpoint['net'][k] = weights totwts += tot_wts totpru += totpru_wts totmcount += tot_mcnt print("Total weights", totwts) print("Total Pruned", totpru) print("Total Mask Count", totmcount) checkpoint['address'] = addressbook
# forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 100 == 99: # print every 100 mini-batches print('[%d, %5d] loss: %.8f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data[0].to(device), data[1].to(device) outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) print('Finished Training') PATH = './cifar_net.pth' torch.save(net.state_dict(), PATH)
cuda = torch.cuda.is_available() model = VGG() if cuda: model.cuda() trainer = OptimizerCIFAR10(model, epochs=args.epochs, expand_interval=args.exp_itv, log_interval=args.log_itv, report_interval=args.rep_itv, expand_rate=args.exp_rate, prune_rate=args.pr_rate, lr=args.lr, weight_decay=0, cuda=cuda) #print("exp_rate: ", trainer.expand_rate) #name = str(model.layer_count) + "_layers_" + "_extend_" + str(trainer.extend_threshold) + "_prune_" + str(trainer.prune_threshold) + "_Adam" name = str(model.layer_count) + "_layers_" + "_expand_" + str(trainer.expand_rate) + "_prune_" + str(trainer.prune_rate) + "_interval_" + str(trainer.expand_interval) + "_lr_" + str(trainer.lr) + "_selu" folder = "./experiments/CIFAR10/" + name logger=Logger(folder) trainer.logger = logger trainer.train() torch.save(model.state_dict(), folder + "/model_trained_for_" + str(trainer.epochs) + "_epochs")
def train(self): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = VGG(self.model_type, True).to(device) loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=self.learning_rate, weight_decay=5e-4) start_epoch = 0 end_epoch = self.epochs average_loss_list = [] writer = SummaryWriter('logs') if self.load_model_dir is not None: checkpoint = torch.load(self.load_model_dir) model.load_state_dict(checkpoint['model_state_dict']) start_epoch += checkpoint['epoch'] end_epoch += checkpoint['epoch'] average_loss_list = checkpoint['average_loss_list'] for idx, loss in enumerate(average_loss_list): writer.add_scalar("Training Loss Average", loss, idx + 1) mean, std = self.compute_mean_std( datasets.CIFAR100(self.dataset_dir, train=True, download=True)) milestone = [60, 120, 160, 180, 200, 220] transform_ops = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std) ]) train_dataset = datasets.CIFAR100(self.dataset_dir, train=True, download=True, transform=transform_ops) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_worker) train_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestone, gamma=0.2) model.train() running_loss = 0 running_idx = 0 running_temp_idx = 0 for epoch in range(start_epoch, end_epoch): total_loss = 0 for batch_idx, (x, y) in enumerate(train_loader): x = Variable(x.cuda()) y = Variable(y.cuda()) optimizer.zero_grad() predicted = model(x) loss = loss_function(predicted, y) loss.backward() optimizer.step() total_loss += loss.data.item() running_loss += loss.data.item() running_temp_idx += 1 if batch_idx % 100 == 0: print( 'Train Epoch: {} {:.2f}% Percent Finished. Current Loss: {:.6f}' .format(epoch + 1, 100 * batch_idx / len(train_loader), total_loss)) if running_temp_idx % 50 == 0: running_idx += 1 writer.add_scalar("Running Loss", running_loss / 100, running_idx) runninng_temp_idx = 0 running_loss = 0 writer.add_scalar("Training Loss Average", total_loss / len(train_loader), epoch + 1) print('Epoch {} Finished! Total Loss: {:.2f}'.format( epoch + 1, total_loss)) print("---------------Test Initalized!------------------") accuracy = test("", 128, model=model) writer.add_scalar("Test Accuracy", accuracy, epoch + 1) train_scheduler.step() average_loss_list.append(total_loss / len(train_loader)) if (epoch + 1) % 50 == 0: torch.save( { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'average_loss_list': average_loss_list, 'model_type': self.model_type }, self.model_save_dir + "vgg-checkpoint-{}.pth".format(epoch + 1)) torch.save( { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'average_loss_list': average_loss_list, 'model_type': self.model_type }, self.model_save_dir + "vgg.pth".format(epoch + 1))
def main(): best_acc = 0 start_epoch = args.start_epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) trainloader = getdata(args, train=True) testloader = getdata(args, train=False) model = VGG(args.attention, args.nclass) if args.gpu: if torch.cuda.is_available(): model = model.cuda() cudnn.benchmark = True else: print( 'There is no cuda available on this machine use cpu instead.') args.gpu = False criterion = nn.CrossEntropyLoss() optimizer = '' if args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: print(args.optimizer, 'is not correct') return title = 'cifar-10-' + args.attention if args.evaluate: print('\nEvaluation only') assert os.path.isfile( args.evaluate), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.evaluate) model.load_state_dict(checkpoint['state_dict']) test_loss, test_acc = test(model, testloader, criterion, args.gpu) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, state['attention'] + '-' + 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, state['attention'] + '-' + 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) for epoch in range(start_epoch, args.epochs): start_time = time.time() adjust_learning_rate(optimizer, epoch) train_loss, train_acc = train(model, trainloader, criterion, optimizer, epoch, args.gpu) test_loss, test_acc = test(model, testloader, criterion, args.gpu) if sys.version[0] == '3': train_acc = train_acc.cpu().numpy().tolist()[0] test_acc = test_acc.cpu().numpy().tolist()[0] logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'attention': state['attention'], }, is_best, checkpoint=args.checkpoint) print(time.time() - start_time) print( "epoch: {:3d}, lr: {:.8f}, train-loss: {:.3f}, test-loss: {:.3f}, train-acc: {:2.3f}, test_acc:, {:2.3f}" .format(epoch, state['lr'], train_loss, test_loss, train_acc, test_acc)) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, state['attention'] + '-' + 'log.eps')) print('Best acc:', best_acc)