def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) #损失函数 criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #优化器 optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() number_of_classes = class_dict[args.dataset] in_channels = inp_channel_dict[args.dataset] model = Network(args.init_channels, number_of_classes, args.layers, criterion, in_channels) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Get transforms to apply on data train_transform, valid_transform = utils.get_data_transforms(args) # Get the training queue train_queue, valid_queue = get_training_queues(args, train_transform) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): # if not torch.cuda.is_available(): # logging.info('no gpu device available') # sys.exit(1) np.random.seed(args.seed) torch.manual_seed(args.seed) # logging.info('gpu device = %d' % args.gpu) # logging.info("args = %s", args) criterion = nn.MSELoss() # criterion = criterion.cuda() model = Network(args.network_inputsize, args.network_outputsize, args.max_width, args.max_depth, criterion) # model = model.cuda() optimizer = torch.optim.Adam( model.parameters(), args.learning_rate, #momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) plt.ion() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] # lr = args.learning_rate logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.w_alpha, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) scheduler.step() # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) plt.draw() plt.pause(0.1) plt.ioff() plt.show()
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) logging.info("args = %s", args) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader, valid_loader = utils.search_dataloader(args, kwargs) criterion = nn.CrossEntropyLoss().to(device) model = Network(device, nodes=2).to(device) logging.info( "param size = %fMB", np.sum(np.prod(v.size()) for name, v in model.named_parameters()) / 1e6) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=0.9, weight_decay=args.weight_decay) architect = Architect(model) for epoch in range(args.epochs): logging.info("Starting epoch %d/%d", epoch + 1, args.epochs) # training train_acc, train_obj = train(train_loader, valid_loader, model, architect, criterion, optimizer, device) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_loader, model, criterion, device) logging.info('valid_acc %f', valid_acc) # compute the discrete architecture from the current alphas genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) with open(args.save + '/architecture', 'w') as f: f.write(str(genotype))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10_simple( args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) # adversarial testing adv_acc, adv_obj = infer_minibatch(valid_queue, model, criterion) logging.info('adv_acc %f', adv_acc) #infer_minibatch(valid_queue, model, criterion) utils.save(model, os.path.join(args.save, 'weights_' + str(epoch) + '.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.random_seed: args.seed = np.random.randint(0, 1000, 1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # dataset modelnet pre_transform, transform = T.NormalizeScale(), T.SamplePoints( args.num_points) train_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', True, transform, pre_transform) train_queue = DenseDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) test_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', False, transform, pre_transform) valid_queue = DenseDataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) n_classes = train_queue.dataset.num_classes criterion = torch.nn.CrossEntropyLoss().cuda() model = Network(args.init_channels, n_classes, args.num_cells, criterion, args.n_steps, in_channels=args.in_channels, emb_dims=args.emb_dims, dropout=args.dropout, k=args.k).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) num_edges = model._steps * 2 post_train = 5 # import pdb;pdb.set_trace() args.epochs = args.warmup_dec_epoch + args.decision_freq * ( num_edges - 1) + post_train + 1 logging.info("total epochs: %d", args.epochs) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.normal_candidate_flags = normal_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) count = 0 normal_probs_history = [] train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training # import pdb;pdb.set_trace() att = model.show_att() beta = model.show_beta() train_acc, train_losses = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, train_losses) valid_overall_acc, valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info( 'train_acc %f\tvalid_overall_acc %f \t valid_class_acc %f', train_acc, valid_overall_acc, valid_class_acc) logging.info('beta %s', beta.cpu().detach().numpy()) logging.info('att %s', att.cpu().detach().numpy()) # make edge decisions saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) if saved_memory_normal: del train_queue, valid_queue torch.cuda.empty_cache() count += 1 new_batch_size = args.batch_size + args.batch_increase * count logging.info("new_batch_size = {}".format(new_batch_size)) train_queue = DenseDataLoader(train_dataset, batch_size=new_batch_size, shuffle=True, num_workers=args.batch_size // 2) valid_queue = DenseDataLoader(test_dataset, batch_size=new_batch_size, shuffle=False, num_workers=args.batch_size // 2) # post validation if args.post_val: post_valid_overall_acc, post_valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info('post_valid_overall_acc %f', post_valid_overall_acc) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_overall_acc', valid_overall_acc, epoch) writer.add_scalar('stats/valid_class_acc', valid_class_acc, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())
def nas(args: Namespace, task: Task, preprocess_func: Compose) -> Module: ''' Network Architecture Search method Given task and preprocess function, this method returns a model output by NAS. The implementation of DARTS is available at https://github.com/alphadl/darts.pytorch1.1 ''' # TODO: Replace model with the output by NAS args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) CLASSES = task.n_classes if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) #gpus = [int(args.gpu)] gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CLASSES, args.layers, criterion) model = model.cuda() if len(gpus) > 1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = nn.DataParallel(optimizer, device_ids=gpus) if task.name == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=preprocess_func) #train_transform, valid_transform = utils._data_transforms_cifar10(args) #train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) elif task.name == 'cifar10': train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=preprocess_func) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer.module, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(args, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(args, valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) # return a neural network model (torch.nn.Module) genotype = model.genotype() logging.info('genotype = %s', genotype) model = NetworkClassification(36, task.n_classes, 20, False, genotype) return model
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() KD_loss = kd_loss.KDLoss(args.temp) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, KD_loss) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if not args.cls: print('not cls') trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset( args.dataset, args.dataroot, batch_size=args.batch_size) else: trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset( args.dataset, args.dataroot, 'pair', batch_size=args.batch_size) print(len(trainloader)) print(len(infer_pair_loader)) print(len(valloader)) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(trainloader, infer_pair_loader, model, architect, criterion, KD_loss, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(infer_random_loader, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, k=args.k) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.dataset == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) architect = Architect(model, args) # configure progressive parameter epoch = 0 ks = [6, 4] num_keeps = [7, 4] train_epochs = [2, 2] if 'debug' in args.save else [25, 25] scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min) for i, current_epochs in enumerate(train_epochs): for e in range(current_epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters() # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, e) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) epoch += 1 scheduler.step() utils.save(model, os.path.join(args.save, 'weights.pt')) if not i == len(train_epochs) - 1: model.pruning(num_keeps[i + 1]) # architect.pruning([model.mask_normal, model.mask_reduce]) model.wider(ks[i + 1]) optimizer = configure_optimizer( optimizer, torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)) scheduler = configure_scheduler( scheduler, torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)) logging.info('pruning finish, %d ops left per edge', num_keeps[i + 1]) logging.info('network wider finish, current pc parameter %d', ks[i + 1]) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, args.n_class, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) _, _, n_classes, train_data,val_dat,test_dat = utils2.get_data( "custom", args.train_data_path,args.val_data_path,args.test_data_path, cutout_length=0, validation=True,validation2 = True,n_class = args.n_class, image_size = args.image_size) #balanced split to train/validation print(train_data) # split data to train/validation num_train = len(train_data) n_val = len(val_dat) n_test = len(test_dat) indices1 = list(range(num_train)) indices2 = list(range(n_val)) indices3 = list(range(n_test)) train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices1) valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices2) test_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices3) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, sampler=train_sampler, num_workers=2, pin_memory=True) valid_queue = torch.utils.data.DataLoader(val_dat, batch_size=args.batch_size, sampler=valid_sampler, num_workers=2, pin_memory=True) test_queue = torch.utils.data.DataLoader(test_dat, batch_size=args.batch_size, sampler=test_sampler, num_workers=2, pin_memory=True) """ train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.set=='cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) """ scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) bestMetric = -999 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) #print(F.softmax(model.alphas_normal, dim=-1)) #print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,epoch) logging.info('train_acc %f', train_acc) # validation #if args.epochs-epoch<=1: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) test_acc,test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) if(valid_acc > bestMetric): bestMetric = valid_acc utils.save(model, os.path.join(args.save, 'best_weights.pt'))
def main(): # check gpu is available if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) # init np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # criterion, model, optimizer, for model training criterion = nn.CrossEntropyLoss() # TODO add latency loss criterion = criterion.cuda() model = Network(channels, steps, strides, CLASSES, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # prepare datasets #train_transform, valid_transform = utils._data_transforms_cifar10(args) #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) train_transform, valid_transform = utils._data_transforms_imagenet(args) train_data = dset.ImageNet(root=args.data, split='train', download=True, transform=train_transform) valid_data = dset.ImageNet(root=args.data, split='val', download=True, transform=valid_transform) num_train = len(train_data) #indices = list(range(num_train)) #split = int(np.floor(args.train_portion * num_train)) # create dataloader train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) # learning rate scheduler with cosineAnnealingtopk scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) # architect architect = Architect(model, args) # training for epoch in range(args.epochs): # lr update scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # get genotype for logging genotype = model.genotype() logging.info('genotype = %s', genotype) for alpha in model.arch_parameters(): print(F.softmax(alpha, dim=-1).data) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) model = Network() model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum) optimizer_enhance = torch.optim.SGD(model.enhance_net_parameters(), args.learning_rate, momentum=args.momentum) optimizer_denoise = torch.optim.SGD(model.denoise_net_parameters(), args.learning_rate, momentum=args.momentum) # prepare DataLoader train_low_data_names = r'D:\ZJA\data\LOL\trainA/*.png' # train_low_data_names = r'H:\image-enhance\UPE500\trainA/*.png' TrainDataset = MemoryFriendlyLoader(img_dir=train_low_data_names, task='train') valid_low_data_names = r'D:\ZJA\data\LOL\validA/*.png' # valid_low_data_names = r'H:\image-enhance\UPE500\validA/*.png' ValidDataset = MemoryFriendlyLoader(img_dir=valid_low_data_names, task='valid') train_queue = torch.utils.data.DataLoader(TrainDataset, batch_size=args.batch_size, pin_memory=True, num_workers=0) valid_queue = torch.utils.data.DataLoader(ValidDataset, batch_size=args.batch_size, pin_memory=True, num_workers=0) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) enhance_architect = Enhence_Architect(model, args) denoise_architect = Denoise_Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) logging.info('Architect of IEM:') logging.info('iem = %s', str(0)) genotype = model.genotype(0, task='enhance') logging.info('genotype = %s', genotype) logging.info('iem %s', str(0)) logging.info('%s', F.softmax(model.alphas_enhances[0], dim=-1)) logging.info('Architect of NRM:') logging.info('nrm = %s', str(0)) genotype = model.genotype(0, task='denoise') logging.info('genotype = %s', genotype) logging.info('nrm %s', str(0)) logging.info('%s', F.softmax(model.alphas_denoises[0], dim=-1)) # training train(train_queue, valid_queue, model, enhance_architect, denoise_architect, optimizer_enhance, optimizer_denoise, lr, epoch)
def darts(exp_name, args): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) args['save'] = './{}/{}-{}-{}'.format(exp_name, args['save'], time.strftime("%Y%m%d-%H%M%S"), args['seed']) utils.create_exp_dir(args['save'], scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args['save'], 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) np.random.seed(args['seed']) torch.cuda.set_device(args['gpu']) cudnn.benchmark = True torch.manual_seed(args['seed']) cudnn.enabled = True torch.cuda.manual_seed(args['seed']) logging.info('gpu device = %s' % args['gpu']) logging.info("args = %s", args) data_augmentations = transforms.ToTensor() train_data = KMNIST(args['data'], True, data_augmentations) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args['init_channels'], train_data.n_classes, args['layers'], criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args['learning_rate'], momentum=args['momentum'], weight_decay=args['weight_decay']) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args['train_portion'] * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args['batch_size'], sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split])) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args['batch_size'], sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train])) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args['epochs']), eta_min=args['learning_rate_min']) architect = Architect(model, args) for epoch in range(args['epochs']): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) print(F.softmax(model.betas_normal[2:5], dim=-1)) #model.drop_path_prob = args['drop_path_prob * epoch / args['epochs # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation if args['epochs'] - epoch <= 1: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args['save'], 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if (args.regularize_type not in ["", "dirichlet", "gumball"]): logging.info('regularization type set incorrectly') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() #reg coef and alpha model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, 0.05, 1) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) alphas_normal = F.softmax(model.alphas_normal, dim=-1) alphas_reduce = F.softmax(model.alphas_reduce, dim=-1) logging.info(alphas_normal) logging.info(alphas_reduce) #exp4 alternation: epoch even = 1.0, not even = 10e-30 cur_temp = None if epoch % 2 == 0: cur_temp = 1.0 else: cur_temp = 10e-10 model.set_temperature(cur_temp) logging.info('starting with temperature %f', cur_temp) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) #logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) #logging.info('valid_acc %f', valid_acc) model_epoch_dir = (exp_dir + "/epoch{epoch_num}/").format(epoch_num=epoch) if not os.path.exists(model_epoch_dir): os.makedirs(model_epoch_dir) utils.save(model, os.path.join(model_epoch_dir, 'weights.pt'))
def search(self, train_x, train_y, valid_x, valid_y, metadata): np.random.seed(self.seed) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) is_multi_gpu = False helper_function() n_classes = metadata['n_classes'] # check torch available if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) cudnn.benchmark = True cudnn.enabled = True # loading criterion criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() train_pack = list(zip(train_x, train_y)) valid_pack = list(zip(valid_x, valid_y)) data_channel = np.array(train_x).shape[1] train_loader = torch.utils.data.DataLoader(train_pack, int(self.batch_size), pin_memory=True, num_workers=4) valid_loader = torch.utils.data.DataLoader(valid_pack, int(self.batch_size), pin_memory=True, num_workers=4) model = Network(self.init_channels, data_channel, n_classes, self.layers, criterion) model = model.cuda() # since submission server does not deal with multi-gpu if is_multi_gpu: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) arch_parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() arch_params = list(map(id, arch_parameters)) parameters = model.module.parameters( ) if is_multi_gpu else model.parameters() weight_params = filter(lambda p: id(p) not in arch_params, parameters) optimizer = torch.optim.SGD(weight_params, self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(self.epochs), eta_min=self.learning_rate_min) architect = Architect(is_multi_gpu, model, criterion, self.momentum, self.weight_decay, self.arch_learning_rate, self.arch_weight_decay) best_accuracy = 0 best_accuracy_different_cnn_counts = dict() for epoch in range(self.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_batch = time.time() for step, (input, target) in enumerate(train_loader): # logging.info("epoch %d, step %d START" % (epoch, step)) model.train() n = input.size(0) input = input.cuda() target = target.cuda() # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_loader)) input_search = input_search.cuda() target_search = target_search.cuda() # Update architecture alpha by Adam-SGD # logging.info("step %d. update architecture by Adam. START" % step) # if args.optimization == "DARTS": # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # else: architect.step_milenas_2ndorder(input, target, input_search, target_search, lr, optimizer, 1, 1) # logging.info("step %d. update architecture by Adam. FINISH" % step) # Update weights w by SGD, ignore the weights that gained during architecture training # logging.info("step %d. update weight by SGD. START" % step) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() nn.utils.clip_grad_norm_(parameters, self.grad_clip) optimizer.step() # logging.info("step %d. update weight by SGD. FINISH\n" % step) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) # torch.cuda.empty_cache() if step % self.report_freq == 0: average_batch_t = (time.time() - train_batch) / (step + 1) print("Epoch: {}, Step: {}, Top1: {}, Top5: {}, T: {}". format( epoch, step, top1.avg, top5.avg, show_time(average_batch_t * (len(train_loader) - step)))) model.eval() # validation with torch.no_grad(): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(valid_loader): input = input.cuda() target = target.cuda() logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.report_freq == 0: print("Epoch: {}, Step: {}, Top1: {}, Top5: {}".format( epoch, step, top1.avg, top5.avg)) scheduler.step() # save the structure genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype( ) if is_multi_gpu else model.genotype() print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count)) # print(F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1)) # print(F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1)) # logging.info('genotype = %s', genotype) return model
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, args.eta_min, args.reg_flops, args.mu) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer_alpha = torch.optim.SGD( model.arch_parameters(), args.learning_rate_alpha, momentum=args.momentum, weight_decay=args.weight_decay_alpha) optimizer_omega = torch.optim.SGD( model.parameters(), args.learning_rate_omega, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) epoch = 0 flops_lambda = 0 flops_lambda_delta = args.lambda0 finished = False t = 0 while not finished: epoch_start = time.time() lr = args.learning_rate_omega model.drop_path_prob = 0 logging.info('epoch %d lr %e flops_weight %e', epoch, lr, flops_lambda) train_acc, train_obj = train(train_queue, model, criterion, optimizer_alpha, optimizer_omega, flops_lambda) logging.info('train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('epoch time: %ds.', epoch_duration) pruning_epoch = prune_op(model, args) current_flops = model.current_flops() + args.base_flops logging.info('current model flops %e', current_flops) if pruning_epoch >= args.pruning_n0: flops_lambda_delta = args.lambda0 flops_lambda = flops_lambda / args.c0 else: flops_lambda_delta = flops_lambda_delta * args.c0 flops_lambda = flops_lambda + flops_lambda_delta if current_flops < args.min_flops: finished = True if pruning_epoch == 0: t = t + 1 else: if t > args.stable_round: genotype = model.genotype() logging.info('genotype = %s', genotype) t = 0 epoch += 1
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #用来优化w的优化器 optimizer = torch.optim.SGD( model.parameters(), #优化器更新的参数,这里更新的是w args.learning_rate, #初始值是0.025,使用的余弦退火调度更新学习率,每个epoch的学习率都不一样 momentum=args.momentum, #0.9 weight_decay=args.weight_decay) #正则化参数3e-4 train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split] ), #自定义从样本中取数据的策略,当train_portion=0.5时,就是前一半的数据用于train pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), #数据集中后一半的数据用于验证 pin_memory=True, num_workers=2) # 学习率更新参数,每次迭代调整不同的学习率 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( #使用余弦退火调度设置各组参数组的学习率 optimizer, float(args.epochs), eta_min=args.learning_rate_min) # 创建用于更新α的architect architect = Architect(model, args) # 经历50个epoch后搜索完毕 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] #得到本次迭代的学习率lr logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() #对应论文2.4 选出来权重值大的两个前驱节点,并把(操作,前驱节点)存下来 logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): start = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(config.local_rank % len(config.gpus)) torch.distributed.init_process_group(backend='nccl', init_method='env://') config.world_size = torch.distributed.get_world_size() config.total_batch = config.world_size * config.batch_size np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True CLASSES = 1000 channels = [32, 16, 24, 40, 80, 96, 192, 320, 1280] steps = [1, 1, 2, 3, 4, 3, 3, 1, 1] strides = [2, 1, 2, 2, 1, 2, 1, 1, 1] criterion = nn.CrossEntropyLoss() criterion_latency = LatencyLoss(channels[2:9], steps[2:8], strides[2:8]) criterion = criterion.cuda(config.gpus) criterion_latency = criterion_latency.cuda(config.gpus) model = Network(channels, steps, strides, CLASSES, criterion) model = model.to(device) #model = DDP(model, delay_allreduce=True) # For solve the custome loss can`t use model.parameters() in apex warpped model via https://github.com/NVIDIA/apex/issues/457 and model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[config.local_rank], output_device=config.local_rank) logger.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) train_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.train_portion) valid_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + "/" + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.val_portion) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(config.epochs), eta_min=config.w_lr_min) if len(config.gpus) > 1: architect = Architect(model.module, config) else: architect = Architect(module, config) best_top1 = 0. for epoch in range(config.epochs): scheduler.step() lr = scheduler.get_lr()[0] logger.info('epoch %d lr %e', epoch, lr) #print(F.softmax(model.alphas_normal, dim=-1)) #print(F.softmax(model.alphas_reduce, dim=-1)) # training train_top1, train_loss = train(train_data, valid_data, model, architect, criterion, criterion_latency, optimizer, lr, epoch, writer) logger.info('Train top1 %f', train_top1) # validation top1 = 0 if config.epochs - epoch <= 1: top1, loss = infer(valid_data, model, epoch, criterion, writer) logger.info('valid top1 %f', top1) if len(config.gpus) > 1: genotype = model.module.genotype() else: genotype = model.genotype() logger.info("genotype = {}".format(genotype)) # genotype as a image plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch + 1)) caption = "Epoch {}".format(epoch + 1) plot(genotype.normal, plot_path + "-normal") plot(genotype.reduce, plot_path + "-reduce") # save if best_top1 < top1: best_top1 = top1 best_genotype = genotype is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") utils.time(time.time() - start) logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) logger.info("Best Genotype = {}".format(best_genotype))
def main(): path_to_best_loss_eval = "./generator/best_loss_model_{}.csv".format(args.seed) path_to_best_model = "./generator/best_model_{}.pth".format(args.seed) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) # ================= DONAS ========================== low_flops = args.low_flops high_flops = args.high_flops nodes, edges = model.get_arch_param_nums() lookup_table = LookUpTable(edges, nodes) arch_param_nums = nodes * edges generator = get_generator(20) generator = generator.cuda() backbone_pool = BackbonePool(nodes, edges, lookup_table, arch_param_nums) backbone = backbone_pool.get_backbone((low_flops+high_flops)/2) g_optimizer = torch.optim.Adam(generator.parameters(), weight_decay=0, lr=0.001, betas=(0.5, 0.999)) tau = 5 best_hc_loss = 100000 # ================= DONAS ========================== architect = Architect(model, generator, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, low_flops, high_flops, backbone, tau) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, generator, backbone, (low_flops+high_flops)//2, lookup_table) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) evalulate_metric, total_loss, kendall_tau = evalulate_generator(generator, backbone, lookup_table, low_flops, high_flops) if total_loss < best_hc_loss: logger.log("Best hc loss : {}. Save model!".format(total_loss)) save_generator_evaluate_metric(evalulate_metric, path_to_best_loss_eval) best_hc_loss = total_loss if valid_acc > best_top1: logger.log("Best top1-avg : {}. Save model!".format(valid_acc_top1)) save_model(generator, path_to_best_model) best_top1 = valid_acc tau *= 0.95
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) random.seed(args.seed) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = False torch.manual_seed(args.seed) cudnn.enabled = True cudnn.deterministic = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if args.loss_func == 'cce': criterion = nn.CrossEntropyLoss().cuda() elif args.loss_func == 'rll': criterion = utils.RobustLogLoss(alpha=args.alpha).cuda() else: assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format( args.loss_func) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() model.train() model.apply(weights_init) nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) train_transform, valid_transform = utils._data_transforms_cifar10(args) # Load dataset if args.dataset == 'cifar10': train_data = CIFAR10(root=args.data, train=True, gold=False, gold_fraction=0.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) gold_train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) elif args.dataset == 'cifar100': train_data = CIFAR100(root=args.data, train=True, gold=False, gold_fraction=0.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) gold_train_data = CIFAR100(root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) clean_train_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0) noisy_train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0) clean_valid_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=0) noisy_valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=0) clean_train_list, clean_valid_list, noisy_train_list, noisy_valid_list = [], [], [], [] for dst_list, queue in [ (clean_train_list, clean_train_queue), (clean_valid_list, clean_valid_queue), (noisy_train_list, noisy_train_queue), (noisy_valid_list, noisy_valid_queue), ]: for input, target in queue: input = Variable(input, volatile=True).cuda() target = Variable(target, volatile=True).cuda(async=True) dst_list.append((input, target)) for epoch in range(args.epochs): logging.info('Epoch %d, random architecture with fix weights', epoch) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) # training clean_train_acc, clean_train_obj = infer(clean_train_list, model, criterion, kind='clean_train') logging.info('clean_train_acc %f, clean_train_loss %f', clean_train_acc, clean_train_obj) noisy_train_acc, noisy_train_obj = infer(noisy_train_list, model, criterion, kind='noisy_train') logging.info('noisy_train_acc %f, noisy_train_loss %f', noisy_train_acc, noisy_train_obj) # validation clean_valid_acc, clean_valid_obj = infer(clean_valid_list, model, criterion, kind='clean_valid') logging.info('clean_valid_acc %f, clean_valid_loss %f', clean_valid_acc, clean_valid_obj) noisy_valid_acc, noisy_valid_obj = infer(noisy_valid_list, model, criterion, kind='noisy_valid') logging.info('noisy_valid_acc %f, noisy_valid_loss %f', noisy_valid_acc, noisy_valid_obj) utils.save(model, os.path.join(args.save, 'weights.pt')) # Randomly change the alphas k = sum(1 for i in range(model._steps) for n in range(2 + i)) num_ops = len(PRIMITIVES) model.alphas_normal.data.copy_(torch.randn(k, num_ops)) model.alphas_reduce.data.copy_(torch.randn(k, num_ops))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if args.loss_func == 'cce': criterion = nn.CrossEntropyLoss().cuda() elif args.loss_func == 'rll': criterion = utils.RobustLogLoss().cuda() else: assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format( args.loss_func) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) # Load dataset if args.gold_fraction == 0: train_data = CIFAR10(root=args.data, train=True, gold=False, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) if args.clean_valid: gold_train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) else: train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) if args.clean_valid: valid_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:]), pin_memory=True, num_workers=2) else: valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) # Set random seeds and log GPU info. np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # Set of network and loss function. criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() # DARTS uses the Network class to store the alphas for optimizing the architecture as well as the ] # weights of the architecture determined through the first level of optimization. model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Load and transform CIFAR10. train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # Split CIFAR10 training data into training and validation for search. num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # Set up torch data loader on 2 CPUs for training data. train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) # Set up torch data loader on 2 CPUs for validation data. valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) # Cosine annealing learning rate. scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) # Start bi-level optimization. for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # Sample a genotype from the metamodel. genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # Train the deep network that corresponds to the genotype that was sampled. train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): start_time = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(args.gpu) reproducibility(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.model_name, CIFAR_CLASSES, sub_policies, args.use_cuda, args.use_parallel, temperature=args.temperature, criterion=criterion) # model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # train_transform, valid_transform = utils._data_transforms_cifar10(args) # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # train_data = AugmCIFAR10( # root=args.data, train=True, download=True, # transform=train_transform, ops_names=sub_policies, search=True, magnitudes=model.magnitudes) # valid_data = AugmCIFAR10( # root=args.data, train=True, download=True, # transform=train_transform, ops_names=sub_policies, search=False, magnitudes=model.magnitudes) # num_train = len(train_data) # indices = list(range(num_train)) # split = int(np.floor(args.train_portion * num_train)) # train_queue = torch.utils.data.DataLoader( # train_data, batch_size=args.batch_size, # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), # pin_memory=True, num_workers=args.num_workers) # valid_queue = torch.utils.data.DataLoader( # valid_data, batch_size=args.batch_size, # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), # pin_memory=True, num_workers=args.num_workers) train_queue, valid_queue = get_dataloaders(args.dataset, args.batch_size, args.num_workers, args.dataroot, sub_policies, model.magnitudes, args.cutout, args.cutout_length, split=args.train_portion, split_idx=0, target_lb=-1) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() # logging.info('genotype = %s', genotype) print_genotype(genotype) # logging.info('%s' % str(torch.nn.functional.softmax(model.ops_weights, dim=-1))) probs = model.ops_weights # logging.info('%s' % str(probs / probs.sum(-1, keepdim=True))) logging.info('%s' % str(torch.nn.functional.softmax(probs, dim=-1))) logging.info('%s' % str(model.probabilities.clamp(0, 1))) logging.info('%s' % str(model.magnitudes.clamp(0, 1))) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) end_time = time.time() elapsed = end_time - start_time logging.info('elapsed time: %.3f Hours' % (elapsed / 3600.))
def main(): np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ total, used = os.popen( 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' ).read().split('\n')[args.gpu].split(',') total = int(total) used = int(used) print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) logging.info(f"seed = {args.seed}") criterion = nn.CrossEntropyLoss().to(device) model = Network(args.init_ch, 10, args.layers, criterion).to(device) logging.info("Total param size = %f MB", utils.count_parameters_in_MB(model)) # this is the optimizer to optimize optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0 if 'pydevd' in sys.modules else 4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=0 if 'pydevd' in sys.modules else 4) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.lr_min) arch = Arch(model, args) lines = [f'epoch\ttrain_acc\tval_acc'] genotype = '' for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('\nEpoch: %d lr: %e', epoch, lr) genotype = model.genotype() logging.info('Genotype: %s', genotype) # print(F.softmax(model.alphas_normal, dim=-1)) # print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr) logging.info('train acc: %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid acc: %f', valid_acc) lines.append(f'{epoch}\t{train_acc}\t{valid_acc}') timebudget.report() utils.save(model, os.path.join(args.exp_path, 'search.pt')) pathlib.Path(os.path.join(args.exp_path, 'search.tsv')).write_text('\n'.join(lines)) pathlib.Path(os.path.join(args.exp_path, 'genotype.txt')).write_text(str(genotype))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() if len(gpus)>1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = nn.DataParallel(optimizer, device_ids=gpus) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def __init__(self, save_path, seed, batch_size, grad_clip, epochs, resume_iter=None, init_channels=16): args = {} args['data'] = '/data/mzhang3/randomNAS_own/data' args['epochs'] = epochs args['learning_rate'] = 0.025 args['batch_size'] = batch_size args['learning_rate_min'] = 0.001 args['momentum'] = 0.9 args['weight_decay'] = 3e-4 args['init_channels'] = init_channels args['layers'] = 8 args['drop_path_prob'] = 0.3 args['grad_clip'] = grad_clip args['train_portion'] = 0.5 args['seed'] = seed args['log_interval'] = 50 args['save'] = save_path args['gpu'] = 0 args['cuda'] = True args['cutout'] = False args['cutout_length'] = 16 args['report_freq'] = 50 args = AttrDict(args) self.args = args self.seed = seed np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = False cudnn.enabled = True cudnn.deterministic = True torch.cuda.manual_seed_all(args.seed) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=False, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed)) self.valid_queue = torch.utils.data.DataLoader( train_data, batch_size=32, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed)) self.train_iter = iter(self.train_queue) self.valid_iter = iter(self.valid_queue) self.steps = 0 self.epochs = 0 self.total_loss = 0 self.start_time = time.time() criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() self.criterion = criterion model = Network(args.init_channels, 10, args.layers, self.criterion) model = model.cuda() self.model = model # try: # self.load() # logging.info('loaded previously saved weights') # except Exception as e: # print(e) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(self.model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) self.optimizer = optimizer self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) if resume_iter is not None: self.steps = resume_iter self.epochs = int(resume_iter / len(self.train_queue)) logging.info("Resuming from epoch %d" % self.epochs) self.objs = utils.AvgrageMeter() self.top1 = utils.AvgrageMeter() self.top5 = utils.AvgrageMeter() for i in range(self.epochs): self.scheduler.step() size = 0 for p in model.parameters(): size += p.nelement() logging.info('param size: {}'.format(size)) total_params = sum(x.data.nelement() for x in model.parameters()) logging.info('Args: {}'.format(args)) logging.info('Model total parameters: {}'.format(total_params))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), #momentum=args.momentum, #weight_decay=args.weight_decay) 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # #TODO can we reintroduce these? # momentum=args.momentum, # weight_decay=args.weight_decay) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} architect = Architect(model, args) for epoch in range(args.epochs): model.epoch = epoch print('model temperature param', 1.05**model.epoch) scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax((1.05**epoch) * model.alphas_normal, dim=-1)) train_epoch(train_examples, train_queue, valid_queue, model, architect, criterion, optimizer, regularizer, args.batch_size, args.learning_rate) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID: ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience += 1
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) in_channels, num_classes, dataset_in_torch = utils.dataset_fields( args) # new criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, in_channels, num_classes, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # SGD for weights model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_data = utils.dataset_split_and_transform(dataset_in_torch, args) # new num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) prune = Prune(args.epochs_pre_prune) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) # Pruning if epoch > args.epochs_pre_prune: if epoch == args.epochs - 1: prune.num_to_zero = 90 - ( len(prune.zeros_indices_alphas_normal) ) #need to prune 90 alphas by the end if args.sparse == 'sparse': prune.num_to_zero_sparse(epoch, args) prune.prune_all_alphas(model) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): args.exp_path /= f'{args.gpu}_{time.strftime("%Y%m%d-%H%M%S")}' utils.create_exp_dir(Path(args.exp_path), scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(args.exp_path / 'log.txt') fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.seed is None: raise Exception('designate seed.') np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ # total, used = os.popen( # 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' # ).read().split('\n')[args.gpu].split(',') # total = int(total) # used = int(used) # print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ logging.info(f'GPU device = {args.gpu}') logging.info(f'args = {args}') criterion = nn.CrossEntropyLoss().to(device) setting = args.location model = Network(args.init_ch, 10, args.layers, criterion, setting) checkpoint = None previous_epochs = 0 if args.checkpoint_path: checkpoint = torch.load(args.checkpoint_path) utils.load(model, checkpoint['state_dict'], False) previous_epochs = checkpoint['epoch'] args.epochs -= previous_epochs if args.epochs <= 0: raise Exception('args.epochs is too small.') if use_DataParallel: print('use Data Parallel') model = nn.parallel.DataParallel(model) model = model.cuda() module = model.module torch.cuda.manual_seed_all(args.seed) else: model = model.to(device) module = model param_size = utils.count_parameters_in_MB(model) logging.info(f'param size = {param_size}MB') arch_and_attn_params = list( map( id, module.arch_and_attn_parameters() if use_DataParallel else model.arch_and_attn_parameters())) weight_params = filter( lambda p: id(p) not in arch_and_attn_params, module.parameters() if use_DataParallel else model.parameters()) optimizer = optim.SGD(weight_params, args.lr, momentum=args.momentum, weight_decay=args.wd) if checkpoint: optimizer.load_state_dict(checkpoint['optimizer']) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=8) # from 2 valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=8) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=args.lr_min) if checkpoint: scheduler.load_state_dict(checkpoint['scheduler']) arch = Arch(model, criterion, args) if checkpoint: arch.optimizer.load_state_dict(checkpoint['arch_optimizer']) for epoch in tqdm(range(args.epochs), desc='Total Progress'): scheduler.step() lr = scheduler.get_lr()[0] logging.info(f'\nEpoch: {epoch} lr: {lr}') gen = module.genotype() logging.info(f'Genotype: {gen}') print(F.softmax(module.alphas_normal, dim=-1)) print(F.softmax(module.alphas_reduce, dim=-1)) if module.betas_normal is not None: print(F.softmax(module.betas_normal, dim=-1)) print(F.softmax(module.betas_reduce, dim=-1)) if module.gammas_normal is not None: print(F.softmax(module.gammas_normal, dim=-1)) print(F.softmax(module.gammas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr, epoch + 1) logging.info(f'train acc: {train_acc}') # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch + 1) logging.info(f'valid acc: {valid_acc}') utils.save(model, args.exp_path / 'search.pt') utils.save_checkpoint( { 'epoch': epoch + 1 + previous_epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'arch_optimizer': arch.optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, False, args.exp_path) gen = module.genotype() gen_path = args.exp_path / 'genotype.json' utils.save_genotype(gen, gen_path) logging.info(f'Result genotype: {gen}')
def main(args): global log log = logging.getLogger("train_search") CIFAR_CLASSES = 10 if args.set == 'cifar100': CIFAR_CLASSES = 100 if not torch.cuda.is_available(): log.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) log.info('gpu device = %d' % args.gpu) log.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() log.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, _ = utils._data_transforms_cifar10(args) if args.set == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) targets = train_data.targets train_idx = np.arange(len(targets)) if args.subsample > 0: train_idx, _ = train_test_split(train_idx, test_size=1 - args.subsample, shuffle=True, stratify=targets) num_train = len(train_idx) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_idx[indices[:split]]), pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_idx[indices[split:num_train]]), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs, eta_min=args.learning_rate_min) architect = Architect(model, args) train_acc = None valid_acc = None l1_loss = torch.zeros(1) l2_loss = torch.zeros(1) criterion_loss = torch.zeros(1) genotype = model.genotype() log.info('initial genotype = %s', genotype) for epoch in range(args.epochs): lr = scheduler.get_last_lr()[0] log.info('epoch %d lr %e', epoch, lr) # model.drop_path_prob = args.drop_path_prob * epoch / args.epochs # training train_acc, train_obj, l1_loss, l2_loss, criterion_loss = train( train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, args.grad_clip, args.report_lines, args.unrolled, args.criterion_weight, args.l1_weight, args.l2_weight) scheduler.step() log.info('train_acc %f', train_acc) log.info('%s %f', L1_LOSS, l1_loss) log.info('%s %f', L2_LOSS, l2_loss) log.info('criterion_loss %f', criterion_loss) # validation if args.epochs - epoch <= 1: valid_acc, valid_obj = infer(valid_queue, model, criterion, args.report_lines) log.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() log.info('genotype = %s', genotype) log.info('last genotype = %s', genotype) model = TrainNetwork(36, CIFAR_CLASSES, 20, False, genotype) model_size_mb = utils.count_parameters_in_MB(model) log.info("Train model param size = %.2fMB", model_size_mb) return { L1_LOSS: { tuple([args.l1_weight, args.criterion_weight]): { TRAIN_ACC: train_acc, VALID_ACC: valid_acc, REG_LOSS: l1_loss.cpu().data.item(), CRITERION_LOSS: criterion_loss.cpu().data.item(), SIZE: model_size_mb, GENOTYPE: genotype } }, L2_LOSS: { tuple([args.l2_weight, args.criterion_weight]): { TRAIN_ACC: train_acc, VALID_ACC: valid_acc, REG_LOSS: l2_loss.cpu().data.item(), CRITERION_LOSS: criterion_loss.cpu().data.item(), SIZE: model_size_mb, GENOTYPE: genotype } } }