def train(train_queue, valid_queue, model, architect, criterion, optimizer, optimizer2, lr, lr2, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) model.softmax_arch_parameters() logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.restore_arch_parameters() # print('after restore', model.arch_parameters()) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # model_adv.train() model_adv = AttackPGD(model) logits1, diff, x = model_adv(input, target) deltas = torch.round(torch.abs(diff) * 255 / 8 + 0.499 - (epoch / 300)) pert_inp = torch.mul(input, deltas) # pert_inp = torch.mul (input, torch.abs(diff)) model2.train() optimizer2.zero_grad() logits2 = model2(pert_inp) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer2.step() return top1.avg, objs.avg
def train3(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 delta = torch.empty(64, 3, 32, 32) m = 64 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) print(n) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ if diff.size() != delta.size(): print(list(diff.size())) print(list(input.size())) break delta = diff prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # if step > 5: # break return top1.avg, objs.avg, delta
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha # pert_inp = input * diff pert_inp = torch.mul(input, diff) logits2 = model2(pert_inp) # logits2 = model2(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer.step() train_loss += loss2.item() _, predicted = logits2.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (step + 1), 100. * correct / total, correct, total)) return 100. * correct / total, train_loss / (max_step + 1)
def train2(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 # delta = torch.empty(5, 3, 32, 32) m = 64 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) print(n) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ if diff.size() != delta.size(): print(list(diff.size())) print(list(input.size())) break delta = diff prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # if step > 5: # break for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha # pert_inp = input * diff if delta.size() != input.size(): print(list(delta.size())) print(list(input.size())) break else: pert_inp = torch.mul(input, delta) logits2 = model2(pert_inp) # logits2 = model2(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() train_loss += loss2.item() _, predicted = logits2.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (step + 1), 100. * correct / total, correct, total)) return 100. * correct / total, train_loss / (max_step + 1)
def main(): torch.set_num_threads(3) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if args.perturb_alpha == 'none': perturb_alpha = None elif args.perturb_alpha == 'pgd_linf': perturb_alpha = Linf_PGD_alpha elif args.perturb_alpha == 'random': perturb_alpha = Random_alpha criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() ####################################### resnet18 = models.resnet18() # torch.cuda.clear_memory_allocated() # del Variables # gc.collect() # torch.cuda.empty_cache() resnet18 = resnet18.cuda() model2 = resnet18 ###################################### model = Network(args.init_channels, n_classes, args.layers, criterion, spaces_dict[args.search_space]) model = model.cuda() model_adv = AttackPGD(model) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.dataset == 'cifar10': train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) elif args.dataset == 'cifar100': train_transform, valid_transform = utils._data_transforms_cifar100(args) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) elif args.dataset == 'svhn': train_transform, valid_transform = utils._data_transforms_svhn(args) train_data = dset.SVHN(root=args.data, split='train', download=True, transform=train_transform) # num_train = len(train_data)+24 num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # if 'debug' in args.save: # split = args.batch_size # num_train = 2 * args.batch_size train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) # train_queue = torch.utils.data.DataLoader( # train_data, batch_size=args.batch_size, # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:5]), # pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] if args.cutout: # increase the cutout probability linearly throughout search train_transform.transforms[-1].cutout_prob = args.cutout_prob * epoch / (args.epochs - 1) logging.info('epoch %d lr %e cutout_prob %e', epoch, lr, train_transform.transforms[-1].cutout_prob) else: logging.info('epoch %d lr %e', epoch, lr) if args.perturb_alpha: epsilon_alpha = 0.03 + (args.epsilon_alpha - 0.03) * epoch / args.epochs logging.info('epoch %d epsilon_alpha %e', epoch, epsilon_alpha) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training # _, _, delta = train3(train_queue, valid_queue, model, architect, criterion, optimizer, lr, # perturb_alpha, epsilon_alpha, model2, epoch) # train_acc, train_obj = train4(train_queue, valid_queue, model, architect, criterion, optimizer, lr, # perturb_alpha, epsilon_alpha, model2, epoch, delta) # train_acc, train_obj = train2(train_queue, valid_queue, model, architect, criterion, optimizer, lr, # perturb_alpha, epsilon_alpha, model2, epoch) train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch) logging.info('train_acc %f', train_acc) writer.add_scalar('Acc/train', train_acc, epoch) writer.add_scalar('Obj/train', train_obj, epoch) # validation # valid_acc, valid_obj = infer(valid_queue, model, criterion) ############################################################################################################ valid_acc, valid_obj = infer(valid_queue, resnet18, criterion) ############################################################################################################ logging.info('valid_acc %f', valid_acc) writer.add_scalar('Acc/valid', valid_acc, epoch) writer.add_scalar('Obj/valid', valid_obj, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) writer.close()