def get_train_val_loaders(self): if self.args.dataset == 'cifar10': train_transform, valid_transform = utils._data_transforms_cifar10(self.args) train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) elif self.args.dataset == 'cifar100': train_transform, valid_transform = utils._data_transforms_cifar100(self.args) train_data = dset.CIFAR100(root=self.args.data, train=True, download=True, transform=train_transform) elif self.args.dataset == 'svhn': train_transform, valid_transform = utils._data_transforms_svhn(self.args) train_data = dset.SVHN(root=self.args.data, split='train', download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(self.args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) return train_queue, valid_queue, train_transform, valid_transform
def get_train_val_loaders(self): if self.args.dataset == 'cifar10': train_transform, valid_transform = utils._data_transforms_cifar10( self.args) train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform) elif self.args.dataset == 'cifar100': train_transform, valid_transform = utils._data_transforms_cifar100( self.args) train_data = dset.CIFAR100(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=self.args.data, train=False, download=True, transform=valid_transform) elif self.args.dataset == 'svhn': train_transform, valid_transform = utils._data_transforms_svhn( self.args) train_data = dset.SVHN(root=self.args.data, split='train', download=True, transform=train_transform) valid_data = dset.SVHN(root=self.args.data, split='test', download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=True, num_workers=2) return train_queue, valid_queue, train_transform, valid_transform
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.super_seed) cudnn.benchmark = True torch.manual_seed(args.super_seed) cudnn.enabled = True torch.cuda.manual_seed(args.super_seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers, drop_last=True) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers, drop_last=True) ood_queues = {} for k in ['svhn', 'lsun_resized', 'imnet_resized']: ood_path = os.path.join(args.ood_dir, k) dset_ = dset.ImageFolder(ood_path, valid_transform) loader = torch.utils.data.DataLoader( dset_, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers ) ood_queues[k] = loader # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() supernet = Network( args.init_channels, CIFAR_CLASSES, args.layers, combine_method=args.feat_comb, is_cosine=args.is_cosine, ) supernet.cuda() supernet.generate_share_alphas() #This is to prevent supernet alpha attribute being None type alphas_path = './results/{}/eval_out/{}/alphas.pt'.format(args.load_at.split('/')[2], args.folder) logging.info('Loading alphas at: %s' % alphas_path) alphas = torch.load(alphas_path) subnet = supernet.get_sub_net(alphas[:, :-1]) logging.info(alphas) if args.cifar100: weight_decay = 5e-4 else: weight_decay = 3e-4 optimizer = torch.optim.SGD( subnet.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=weight_decay, ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.learning_rate_min) for epoch in range(args.epochs): logging.info('epoch {} lr {:.4f}'.format(epoch, scheduler.get_last_lr()[0])) train_acc, _ = train(train_queue, subnet, criterion, optimizer) logging.info('train_acc {:.2f}'.format(train_acc)) valid_acc, valid_loss = infer(valid_queue, subnet, criterion) writer_va.add_scalar('loss', valid_loss, global_step) writer_va.add_scalar('acc', valid_acc, global_step) logging.info('valid_acc {:.2f}'.format(valid_acc)) scheduler.step() if not os.path.exists(args.ckpt_path): os.makedirs(args.ckpt_path) utils.save(subnet, os.path.join(args.ckpt_path, 'subnet_{}_weights.pt'.format(args.folder))) lg_aucs, sm_aucs, ent_aucs = ood_eval(valid_queue, ood_queues, subnet, criterion) logging.info('Writting results:') out_dir = './results/{}/eval_out/{}/'.format(args.load_at.split('/')[2], args.folder) with open(os.path.join(out_dir, 'subnet_scratch.txt'), 'w') as f: f.write('-'.join([str(valid_acc), str(lg_aucs), str(sm_aucs), str(ent_aucs)]))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() genotype = eval("genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) model = torch.nn.DataParallel(model) model = model.cuda() start_epch = 0 if args.resume: MT = torch.load(os.path.join(args.save, 'weight_optimizers.pt')) model.load_state_dict(MT['net']) optimizer.load_state_dict(MT['optimizer']) start_epch = MT['epoch'] logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(start_epch, args.epochs): model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs model.drop_path_prob = args.drop_path_prob * epoch / args.epochs start_time = time.time() train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('Train_acc: %f', train_acc) scheduler.step() logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc state = { 'net': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(state, os.path.join(args.save, 'best_weight_optimizers.pt')) logging.info('Valid_acc: %f, best_acc: %f', valid_acc, best_acc) end_time = time.time() duration = end_time - start_time print('Epoch time: %d h.' % (duration * (args.epochs - epoch) / 3600)) if epoch % 50 == 0: state = { 'net': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(state, os.path.join(args.save, 'weight_optimizers.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) torch.cuda.empty_cache() cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genotype.txt') print(genotype_path) if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genoname = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genoname.txt') if os.path.isfile(genoname): with open(genoname, "r") as f: args.arch = f.read() genotype = eval("genotypes.%s" % args.arch) else: genotype = eval("genotypes.ADMM") model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() utils.load(model, os.path.join(utils.get_dir(), args.model_path)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() _, test_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) if args.task == "CIFAR100cf": _, test_transform = utils._data_transforms_cifar100(args) test_data = utils.CIFAR100C2F(root=datapath, train=False, download=True, transform=test_transform) test_indices = test_data.filter_by_fine(args.test_filter) test_queue = torch.utils.data.DataLoader( torch.utils.data.Subset(test_data, test_indices), batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter else: if args.task == "CIFAR100": _, test_transform = utils._data_transforms_cifar100(args) test_data = dset.CIFAR100(root=datapath, train=False, download=True, transform=test_transform) else: _, test_transform = utils._data_transforms_cifar10(args) test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob test_acc, test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) # prepare dataset if args.is_cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.is_cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() supernet = Network(args.init_channels, CIFAR_CLASSES, args.layers) supernet.cuda() if args.is_cifar100: weight_decay = 5e-4 else: weight_decay = 3e-4 optimizer = torch.optim.SGD( supernet.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=weight_decay, ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) for epoch in range(args.epochs): logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) train_acc, train_obj = train(train_queue, supernet, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_top1 = utils.AverageMeter() for i in range(args.eval_time): supernet.generate_share_alphas() ops_alps = supernet.cells[0].ops_alphas subnet = supernet.get_sub_net(ops_alps) valid_acc, valid_obj = infer(valid_queue, subnet, criterion) valid_top1.update(valid_acc) logging.info('Mean Valid Acc: %f', valid_top1.avg) scheduler.step() utils.save(supernet, os.path.join(args.save, 'supernet_weights.pt'))
def main(): if not torch.cuda.is_available(): print('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) print("args = %s", args) print("unparsed args = %s", unparsed) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) ood_queues = {} for k in ['svhn', 'lsun_resized', 'imnet_resized']: ood_path = os.path.join(args.ood_dir, k) dset_ = dset.ImageFolder(ood_path, valid_transform) loader = torch.utils.data.DataLoader( dset_, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers ) ood_queues[k] = loader # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() supernet = Network( args.init_channels, CIFAR_CLASSES, args.layers, combine_method=args.feat_comb, is_cosine=args.is_cosine, ) supernet.cuda() # print(len(supernet.cells)) ckpt = torch.load(args.load_at) print(args.load_at) supernet.load_state_dict(ckpt) supernet.generate_share_alphas() # alphas = torch.Tensor([ # [0., 1., 1.], # [0., 1., 0.], # [0., 1., 0.], # [0., 1., 1.], # [0., 1., 1.], # [0., 1., 1.], # [0., 1., 0.], # [0., 1., 0.], # [0., 1., 1.], # [0., 1., 0.], # [0., 1., 0.], # [0., 1., 1.], # [0., 1., 0.], # [0., 1., 1.] # ]).cuda() # for i in range(8): # supernet.cells[i].ops_alphas = alphas alphas = supernet.cells[0].ops_alphas print(alphas) out_dir = './results/{}/eval_out/{}'.format(args.load_at.split('/')[2], args.seed) if not os.path.exists(out_dir): os.makedirs(out_dir) torch.save(alphas, os.path.join(out_dir, 'alphas.pt')) with open(os.path.join(out_dir, 'alphas.txt'), 'w') as f: for i in alphas.cpu().detach().numpy(): for j in i: f.write('{:d}'.format(int(j))) f.write('\n') if args.cifar100: weight_decay = 5e-4 else: weight_decay = 3e-4 optimizer = torch.optim.SGD( supernet.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=weight_decay, ) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=0) valid_acc, _ = infer(valid_queue, supernet, criterion) print('valid_acc {:.2f}'.format(valid_acc)) lg_aucs, sm_aucs, ent_aucs = ood_eval(valid_queue, ood_queues, supernet, criterion) with open(os.path.join(out_dir, 'before.txt'), 'w') as f: f.write('-'.join([str(valid_acc), str(lg_aucs), str(sm_aucs), str(ent_aucs)])) if args.fine_tune: for epoch in range(args.epochs): # scheduler.step() print('epoch {} lr {:.4f}'.format(epoch, 0.001))#scheduler.get_lr()[0])) train_acc, _ = train(train_queue, supernet, criterion, optimizer) print('train_acc {:.2f}'.format(train_acc)) valid_acc, _ = infer(valid_queue, supernet, criterion) print('valid_acc {:.2f}'.format(valid_acc)) lg_aucs, sm_aucs, ent_aucs = ood_eval(valid_queue, ood_queues, supernet, criterion) with open(os.path.join(out_dir, 'after.txt'), 'w') as f: f.write('-'.join([str(valid_acc), str(lg_aucs), str(sm_aucs), str(ent_aucs)]))
def main(): if not torch.cuda.is_available(): print('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) print("args = %s", args) print("unparsed args = %s", unparsed) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() supernet = Network( args.init_channels, CIFAR_CLASSES, args.layers ) supernet.cuda() ckpt = torch.load(args.load_at) print(args.load_at) supernet.load_state_dict(ckpt) supernet.generate_share_alphas() alphas = supernet.cells[0].ops_alphas print(alphas) out_dir = args.save + '{}/eval_out/{}'.format(args.load_at.split('/')[2], args.seed) if not os.path.exists(out_dir): os.makedirs(out_dir) torch.save(alphas, os.path.join(out_dir, 'alphas.pt')) with open(os.path.join(out_dir, 'alphas.txt'), 'w') as f: for i in alphas.cpu().detach().numpy(): for j in i: f.write('{:d}'.format(int(j))) f.write('\n') # Getting subnet according to sample alpha subnet = supernet.get_sub_net(alphas) init_valid_acc, _ = infer(valid_queue, subnet, criterion) print('Initial Valid Acc {:.2f}'.format(init_valid_acc)) if args.fine_tune: if args.cifar100: weight_decay = 5e-4 else: weight_decay = 3e-4 # Fine tuning whole network: subnet = supernet.get_sub_net(alphas) optimizer = torch.optim.SGD( subnet.parameters(), args.finetune_lr, momentum=args.momentum, weight_decay=weight_decay, ) for epoch in range(args.epochs): # scheduler.step() print('epoch {} lr {:.4f}'.format(epoch, args.finetune_lr)) train_acc, _ = train(train_queue, subnet, criterion, optimizer) print('train_acc {:.2f}'.format(train_acc)) whole_valid_acc, _ = infer(valid_queue, subnet, criterion) print('valid_acc after whole fine-tune {:.2f}'.format(whole_valid_acc)) fly_whole_valid_acc, _ = infer(valid_queue, subnet, criterion, use_fly_bn=False) print('valid_acc after whole fine-tune {:.2f}'.format(fly_whole_valid_acc)) # Fine-tuning only classifier: subnet = supernet.get_sub_net(alphas) # Freezing other weights except classifier: for name, param in subnet.named_parameters(): if not 'classifier' in name: param.requires_grad_(requires_grad=False) optimizer = torch.optim.SGD( subnet.classifier.parameters(), args.finetune_lr, momentum=args.momentum, weight_decay=weight_decay, ) for epoch in range(args.epochs): # scheduler.step() print('epoch {} lr {:.4f}'.format(epoch, args.finetune_lr)) train_acc, _ = train(train_queue, subnet, criterion, optimizer) print('train_acc {:.2f}'.format(train_acc)) part_valid_acc, _ = infer(valid_queue, subnet, criterion) print('valid_acc after fine-tuning classifier {:.2f}'.format(part_valid_acc)) fly_part_valid_acc, _ = infer(valid_queue, subnet, criterion, use_fly_bn=False) print('valid_acc after fine-tuning classifier {:.2f}'.format(fly_part_valid_acc)) with open(os.path.join(out_dir, 'results.txt'), 'w') as f: f.write('-'.join([str(init_valid_acc), str(whole_valid_acc), str(fly_whole_valid_acc), str(part_valid_acc), str(fly_part_valid_acc)])) if not args.fine_tune: with open(os.path.join(out_dir, 'results.txt'), 'w') as f: f.write(str(init_valid_acc))
def main(): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu != -1: if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) else: logging.info('using cpu') if args.dyno_schedule: args.threshold_divider = np.exp(-np.log(args.threshold_multiplier) * args.schedfreq) print( args.threshold_divider, -np.log(args.threshold_multiplier) / np.log(args.threshold_divider)) if args.dyno_split: args.train_portion = 1 - 1 / (1 + args.schedfreq) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() if args.gpu != -1: criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.crb, args.epochs, args.gpu, ewma=args.ewma, reg=args.reg) if args.gpu != -1: model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) datapath = os.path.join(utils.get_dir(), args.data) if args.task == "CIFAR100cf": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * len(indices))) orig_num_train = len(indices[:split]) orig_num_valid = len(indices[split:num_train]) train_indices = train_data.filter_by_fine(args.train_filter, indices[:split]) valid_indices = train_data.filter_by_fine(args.valid_filter, indices[split:num_train]) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=utils.FillingSubsetRandomSampler(train_indices, orig_num_train, reshuffle=True), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=utils.FillingSubsetRandomSampler(valid_indices, orig_num_valid, reshuffle=True), pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter elif args.task == "CIFAR100split": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) if not args.evensplit: train_indices, valid_indices = train_data.split(args.train_portion) else: num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_indices = indices[:split] valid_indices = indices[split:num_train] train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( valid_indices), pin_memory=True, num_workers=2) else: if args.task == "CIFAR100": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=datapath, train=True, download=True, transform=train_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10( args) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_indices = indices[:split] valid_indices = indices[split:num_train] train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( valid_indices), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = { "train": { "loss": [], "acc": [], "step": [] }, "val": { "loss": [], "acc": [], "step": [] }, "infer": { "loss": [], "acc": [], "step": [] }, "ath": { "threshold": [], "step": [] }, "astep": [], "zustep": [] } alpha_threshold = args.init_alpha_threshold alpha_counter = 0 ewma = -1 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) if args.ckpt_interval > 0 and epoch > 0 and ( epoch) % args.ckpt_interval == 0: logging.info('checkpointing genotype') os.mkdir(os.path.join(args.save, 'genotypes', str(epoch))) with open( os.path.join(args.save, 'genotypes', str(epoch), 'genotype.txt'), "w") as f: f.write(str(genotype)) print(model.activate(model.alphas_normal)) print(model.activate(model.alphas_reduce)) # training train_acc, train_obj, alpha_threshold, alpha_counter, ewma = train( train_queue, valid_queue, model, architect, criterion, optimizer, loggers, alpha_threshold, alpha_counter, ewma, args) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock) logging.info('valid_acc %f', valid_acc) utils.plot_loss_acc(loggers, args.save) utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'Normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'Reducealpha'), steps=loggers["train"]["step"]) utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep']) utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"], loggers['astep']) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), args.genotype_path, 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genotype = eval("genotypes.%s" % args.arch) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) datapath = os.path.join(utils.get_dir(), args.data) if args.task == "CIFAR100cf": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) valid_data = utils.CIFAR100C2F(root=datapath, train=False, download=True, transform=valid_transform) train_indices = train_data.filter_by_fine(args.eval_filter) valid_indices = valid_data.filter_by_fine(args.eval_filter) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(torch.utils.data.Subset( valid_data, valid_indices), batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter else: if args.task == "CIFAR100": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=datapath, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=datapath, train=False, download=True, transform=valid_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10( args) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) if args.test: torch.cuda.empty_cache() os.system( 'python src/test.py --batch_size 8 --auxiliary --model_path %s --task %s --test_filter %s' % (os.path.join(args.save, 'weights.pt'), args.task, args.test_filter))