def main(): model = VGG(depth=16, init_weights=True, cfg=None) # model = VGG_shaokai("vgg16") # model = ConvNet() # model = ResNet18() # model = torch.nn.DataParallel(model) model.load_state_dict(torch.load("./model_pruned/2019-04-09 11:14:52.016169/column-filter-fivelabels-masked_retrain/cifar10_vgg16_retrained_acc_93.960_4rhos_config_vgg16_v2.yaml.pt")) model.cuda() criterion = F.cross_entropy # criterion = CrossEntropyLossMaybeSmooth(smooth_eps=0).cuda() validate(test_loader, criterion, model) # test(model, criterion, test_loader) print("\n------------------------------\n") print('here') for name, weight in model.named_parameters(): if (len(weight.size()) == 4 and "shortcut" not in name): print(name, weight.size()) print('here now') test_column_sparsity(model) # test_chanel_sparsity(model) test_filter_sparsity(model)
def get_vgg_net(model_folder, out_keys=['r11', 'r21', 'r31', 'r41', 'r51']): vgg_net = VGG(pool='avg', out_keys=out_keys) vgg_net.load_state_dict(torch.load(model_folder + 'vgg_conv.pth')) vgg_net.cuda() for param in vgg_net.parameters(): param.requires_grad = False return vgg_net
contentImg = Variable(torch.from_numpy(contentImg)) else: styleImg = load_image(opt.style_image) # 1x3x512x512 contentImg = load_image(opt.content_image) # 1x3x512x512 if (opt.cuda): styleImg = styleImg.cuda() contentImg = contentImg.cuda() ############### MODEL #################### vgg = VGG() vgg.load_state_dict(torch.load(opt.vgg_dir)) for param in vgg.parameters(): param.requires_grad = False if (opt.cuda): vgg.cuda() ########### LOSS & OPTIMIZER ########## class GramMatrix(nn.Module): def forward(self, input): b, c, h, w = input.size() f = input.view(b, c, h * w) # bxcx(hxw) # torch.bmm(batch1, batch2, out=None) # # batch1: bxmxp, batch2: bxpxn -> bxmxn # G = torch.bmm(f, f.transpose( 1, 2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc return G.div_(h * w) class styleLoss(nn.Module):
content_img = "./2.jpg" styleImg = load_img(style_img) contentImg = load_img(content_img) #for running on cuda styleImg = styleImg.cuda() contentImg = contentImg.cuda() vgg_directory = "./vgg_conv.pth" #path to pretrained vgg vgg_directory vgg = VGG() #print(vgg.state_dict()) vgg.load_state_dict(torch.load(vgg_directory)) for param in vgg.parameters(): param.requires_grad = False vgg.cuda() # Putting model on cuda class GramMatrix(nn.Module): def forward(self, input): b, c, h, w = input.size() f = input.view(b, c, h * w) #bxcx(hxw) # torch.bmm(batch1, batch2, out=None) # batch1 : bxmxp, batch2 : bxpxn -> bxmxn G = torch.bmm(f, f.transpose( 1, 2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc return G.div_(h * w) class styleLoss(nn.Module): def forward(self, input, target):
def train(): transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(size=32), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=True, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') model = VGG(vars(args)) optimizer = torch.optim.SGD(model.parameters(), lr=args.lrate, momentum=0.9, weight_decay=5e-4) if args.use_cuda: model = model.cuda() if args.eval: model.load_state_dict(torch.load(args.model_dir)) model.eval() accuracy = model.evaluate(testloader) exit() total_size = len(trainloader) lrate = args.lrate best_score = 0.0 scores = [] for epoch in range(1, args.epochs + 1): model.train() for i, (image, label) in enumerate(trainloader): loss = model(image, label) model.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: print('Epoch = %d, step = %d / %d, loss = %.5f lrate = %.5f' % (epoch, i, total_size, loss, lrate)) model.eval() accuracy = model.evaluate(testloader) scores.append(accuracy) with open(args.model_dir + "_scores.pkl", "wb") as f: pkl.dump(scores, f) if best_score < accuracy: best_score = accuracy print('saving %s ...' % args.model_dir) torch.save(model.state_dict(), args.model_dir) if epoch % args.decay_period == 0: lrate *= args.decay for param_group in optimizer.param_groups: param_group['lr'] = lrate
model = VGG(depth=19, init_weights=True, cfg=None) else: sys.exit("vgg doesn't have those depth!") elif args.arch == "resnet": if args.depth == 18: model = ResNet18() elif args.depth == 50: model = ResNet50() else: sys.exit("resnet doesn't implement those depth!") # elif args.arch == "convnet": # args.depth = 4 # model = ConvNet() if args.multi_gpu: model = torch.nn.DataParallel(model) model.cuda() print(args.distill) if args.cuda and args.distill: # crete teacher model. if args.teacharch == "vgg": if args.teachdepth == 16: teacher = VGG(depth=16, init_weights=True, cfg=None) elif args.teachdepth == 19: teacher = VGG(depth=19, init_weights=True, cfg=None) else: sys.exit("vgg doesn't have those depth!") elif args.teacharch == "resnet": if args.teachdepth == 18: teacher = ResNet18() elif args.teachdepth == 50:
# parser.add_argument('--seed', type=int, default=1) parser.add_argument('--device-id', type=int, default=1) parser.add_argument('--log-interval', type=int, default=20) # model directory and name parser.add_argument('--model-dir', type=str, default="../models/TinyImageNet200/VGG19") parser.add_argument('--model-name', type=str, default="vgg19") args = parser.parse_args() return args if __name__ == "__main__": args = set_args() # Config model and gpu torch.manual_seed(args.seed) # model = DenseNet121(num_classes=imagenet_num_class) # model = ResNet50(num_classes=imagenet_num_class) model = VGG("VGG19", imagenet_num_class) args.cuda = torch.cuda.is_available() if args.cuda: torch.cuda.manual_seed(args.seed) model.cuda(args.device_id) import torch.backends.cudnn as cudnn cudnn.benchmark = True # Start training train_imagenet(model, args)
parser.add_argument('--lr', default=0.001, type=float, help='learning rate') parser.add_argument('--exp_itv', default=200, type=int, help='expand interval') parser.add_argument('--log_itv', default=400, type=int, help='tensorboard logging interval') parser.add_argument('--rep_itv', default=10, type=int, help='command line reporting interval') parser.add_argument('--exp_rate', default=4, type=int, help='expand rate') parser.add_argument('--pr_rate', default=0, type=int, help='prune rate') args = parser.parse_args() print(args) cuda = torch.cuda.is_available() model = VGG() if cuda: model.cuda() trainer = OptimizerCIFAR10(model, epochs=args.epochs, expand_interval=args.exp_itv, log_interval=args.log_itv, report_interval=args.rep_itv, expand_rate=args.exp_rate, prune_rate=args.pr_rate, lr=args.lr, weight_decay=0, cuda=cuda) #print("exp_rate: ", trainer.expand_rate) #name = str(model.layer_count) + "_layers_" + "_extend_" + str(trainer.extend_threshold) + "_prune_" + str(trainer.prune_threshold) + "_Adam"
]), } image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in [train, val]} dataloders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=8, shuffle=True, num_workers=4) for x in [train, val]} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', val]} use_gpu = torch.cuda.is_available() model = VGG(2) if os.path.exists(save_path): model.load_state_dict(torch.load(save_path)) if use_gpu: model = model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.95) scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) print ('*' * 10) print ('start training') trainval(dataloders, model, optimizer, scheduler, criterion, dataset_sizes, phase='train')
std=[0.229, 0.224, 0.225]) testset = torchvision.datasets.CIFAR10(root='data/', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])) testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2) # Load the conv model net = VGG('VGG16') net = net.cuda() criterion_no_constrain = nn.CrossEntropyLoss() net.load_state_dict(torch.load(model_path)) net.eval() store_feat_maps(net) # store all feature maps and max pooling locations # # Build the deconv net net_decocnv = _vgg16Deconv() for idx, layer in enumerate(net.features): if isinstance(layer, nn.Conv2d): net_decocnv.features[net_decocnv.conv2deconv_indices[ idx]].weight.data = layer.weight.data if idx in net_decocnv.conv2deconv_bias_indices: net_decocnv.features[net_decocnv.conv2deconv_bias_indices[ idx]].bias.data = layer.bias.data
contentImg = Variable(torch.from_numpy(contentImg)) else: styleImg = load_image(opt.style_image) # 1x3x512x512 contentImg = load_image(opt.content_image) # 1x3x512x512 if(opt.cuda): styleImg = styleImg.cuda() contentImg = contentImg.cuda() ############### MODEL #################### vgg = VGG() vgg.load_state_dict(torch.load(opt.vgg_dir)) for param in vgg.parameters(): param.requires_grad = False if(opt.cuda): vgg.cuda() ########### LOSS & OPTIMIZER ########## class GramMatrix(nn.Module): def forward(self,input): b, c, h, w = input.size() f = input.view(b,c,h*w) # bxcx(hxw) # torch.bmm(batch1, batch2, out=None) # # batch1: bxmxp, batch2: bxpxn -> bxmxn # G = torch.bmm(f,f.transpose(1,2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc return G.div_(h*w) class styleLoss(nn.Module): def forward(self,input,target): GramInput = GramMatrix()(input) return nn.MSELoss()(GramInput,target)
imgs = imgs[mixer] labels = labels[mixer] trainset = torch.utils.data.TensorDataset(imgs[:-10000], labels[:-10000]) trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True, num_workers=2) testset = torch.utils.data.TensorDataset(imgs[-10000:], labels[-10000:]) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) # Model print('==> Building model..') net = VGG('VGG11') if use_cuda: net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = nn.CrossEntropyLoss() optimizer = optim.Adadelta(net.parameters(), lr=0.01) # Training def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda()
def main(): best_acc = 0 start_epoch = args.start_epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) trainloader = getdata(args, train=True) testloader = getdata(args, train=False) model = VGG(args.attention, args.nclass) if args.gpu: if torch.cuda.is_available(): model = model.cuda() cudnn.benchmark = True else: print( 'There is no cuda available on this machine use cpu instead.') args.gpu = False criterion = nn.CrossEntropyLoss() optimizer = '' if args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: print(args.optimizer, 'is not correct') return title = 'cifar-10-' + args.attention if args.evaluate: print('\nEvaluation only') assert os.path.isfile( args.evaluate), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.evaluate) model.load_state_dict(checkpoint['state_dict']) test_loss, test_acc = test(model, testloader, criterion, args.gpu) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, state['attention'] + '-' + 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, state['attention'] + '-' + 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) for epoch in range(start_epoch, args.epochs): start_time = time.time() adjust_learning_rate(optimizer, epoch) train_loss, train_acc = train(model, trainloader, criterion, optimizer, epoch, args.gpu) test_loss, test_acc = test(model, testloader, criterion, args.gpu) if sys.version[0] == '3': train_acc = train_acc.cpu().numpy().tolist()[0] test_acc = test_acc.cpu().numpy().tolist()[0] logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'attention': state['attention'], }, is_best, checkpoint=args.checkpoint) print(time.time() - start_time) print( "epoch: {:3d}, lr: {:.8f}, train-loss: {:.3f}, test-loss: {:.3f}, train-acc: {:2.3f}, test_acc:, {:2.3f}" .format(epoch, state['lr'], train_loss, test_loss, train_acc, test_acc)) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, state['attention'] + '-' + 'log.eps')) print('Best acc:', best_acc)