def main(): args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." model = models_bak.__dict__[args.arch]().cuda() if args.distributed: model = DDP(model) data, train_sampler = torch_loader(f'{args.data}-sz/160', 128, 256) learner = Learner.from_model_data(model, data) learner.crit = F.cross_entropy learner.metrics = [accuracy, top5] if args.fp16: learner.half() wd=2e-5 update_model_dir(learner, args.save_dir) fit(learner, '1', 0.03, 1, train_sampler, wd) data, train_sampler = torch_loader(f'{args.data}-sz/320', 128, 256) learner.set_data(data) fit(learner, '3', 1e-1, 1, train_sampler, wd) data, train_sampler = torch_loader(args.data, 128, 256) learner.set_data(data) fit(learner, '3', 1e-1, 1, train_sampler, wd) print('Finished!')
def main(): args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() if args.distributed: torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.pretrained: model = models.__dict__[args.arch](pretrained=True) else: model = models.__dict__[args.arch]() model = model.cuda() if args.distributed: model = DDP(model) data1 = torch_loader(args.data, args.sz) learner = Learner.from_model_data(model, data1) learner.crit = F.cross_entropy learner.metrics = [accuracy, top1, top5] if args.fp16: learner.half() if args.prof: args.epochs = 1 if args.use_clr: args.use_clr = tuple(map(float, args.use_clr.split(','))) data0 = torch_loader(f'{args.data}-sz/160', 128) data2 = torch_loader(args.data, 288, bs=128, min_scale=0.5) update_model_dir(learner, args.save_dir) sargs = save_args('first_run', args.save_dir) def_phase = {'opt_fn':optim.SGD, 'wds':args.weight_decay} lr = args.lr epoch_sched = [int(args.epochs*o+0.5) for o in (0.47, 0.31, 0.17, 0.05)] if args.warmonly: data = [data0,data1] phases = [ TrainingPhase(**def_phase, epochs=1, lr=(lr/100,lr), lr_decay=DecayType.LINEAR), TrainingPhase(**def_phase, epochs=1, lr=(lr,lr/100), lr_decay=DecayType.LINEAR)] else: data = [data0,data0,data1,data1,data1,data2,data2] phases = [ TrainingPhase(**def_phase, epochs=4, lr=(lr/100,lr), lr_decay=DecayType.LINEAR), TrainingPhase(**def_phase, epochs=epoch_sched[0]-6, lr=lr), TrainingPhase(**def_phase, epochs=2, lr=lr), TrainingPhase(**def_phase, epochs=epoch_sched[1], lr=lr/10), TrainingPhase(**def_phase, epochs=epoch_sched[2]-2, lr=lr/100), TrainingPhase(**def_phase, epochs=2, lr=lr/100), TrainingPhase(**def_phase, epochs=epoch_sched[3], lr=lr/1000)] learner.fit_opt_sched(phases, data_list=data, loss_scale=args.loss_scale, **sargs) save_sched(learner.sched, args.save_dir) print('Finished!')
def main(): args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() if args.distributed: torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if args.cycle_len > 1: args.cycle_len = int(args.cycle_len) # create model if args.pretrained: model = models.__dict__[args.arch](pretrained=True) else: model = models.__dict__[args.arch]() model = model.cuda() if args.distributed: model = DDP(model) data, train_sampler = torch_loader(f'{args.data}-sz/160', 128, 256) learner = Learner.from_model_data(model, data) learner.crit = F.cross_entropy learner.metrics = [accuracy, top5] if args.fp16: learner.half() if args.prof: args.epochs = 1 args.cycle_len = 1 if args.use_clr: args.use_clr = tuple(map(float, args.use_clr.split(','))) wd = 2e-5 update_model_dir(learner, args.save_dir) #fit(learner, '1', 0.03, 1, train_sampler, wd) #data, train_sampler = torch_loader(args.data, 224, 192) #learner.set_data(data) #fit(learner, '3', 1e-1, 1, train_sampler, wd) data, train_sampler = torch_loader(args.data, 288, 128, min_scale=0.5) learner.set_data(data) #fit(learner, '6', 3e-4, 1, train_sampler, wd/2) data, train_sampler = torch_loader(args.data, 288, 64, min_scale=0.5) learner.set_data(data) print('Finished!')
def main(): args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() if args.distributed: torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if args.cycle_len > 1: args.cycle_len = int(args.cycle_len) # create model if args.pretrained: model = models.__dict__[args.arch](pretrained=True) else: model = models.__dict__[args.arch]() model = model.cuda() if args.distributed: model = DDP(model) if args.train_128: data, train_sampler = torch_loader(f'{args.data}-160', 128) else: data, train_sampler = torch_loader(args.data, args.sz) learner = Learner.from_model_data(model, data) learner.crit = F.cross_entropy learner.metrics = [accuracy, top5] if args.fp16: learner.half() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.resume.endswith('.h5'): args.resume = args.resume[:-len('.h5')] learner.load(args.resume) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.prof: args.epochs = 1 args.cycle_len=1 if args.use_clr: args.use_clr = tuple(map(float, args.use_clr.split(','))) # 128x128 if args.train_128: save_dir = f'{args.save_dir}/128' update_model_dir(learner, save_dir) sargs = save_args('first_run_128', save_dir) learner.fit(args.lr,args.epochs, cycle_len=args.cycle_len, sampler=train_sampler, wds=args.weight_decay, use_clr_beta=args.use_clr, loss_scale=args.loss_scale, **sargs) save_sched(learner.sched, save_dir) data, train_sampler = torch_loader(args.data, args.sz) learner.set_data(data) # Full size update_model_dir(learner, args.save_dir) sargs = save_args('first_run', args.save_dir) learner.fit(args.lr,args.epochs, cycle_len=args.cycle_len, sampler=train_sampler, wds=args.weight_decay, use_clr_beta=args.use_clr, loss_scale=args.loss_scale, **sargs) save_sched(learner.sched, args.save_dir) # TTA works ~50% of the time. Hoping top5 works better if args.use_tta: log_preds,y = learner.TTA() preds = np.mean(np.exp(log_preds),0) acc = accuracy(torch.FloatTensor(preds),torch.LongTensor(y)) t5 = top5(torch.FloatTensor(preds),torch.LongTensor(y)) print('TTA acc:', acc) print('TTA top5:', t5[0]) with open(f'{args.save_dir}/tta_accuracy.txt', "a", 1) as f: f.write(time.strftime("%Y-%m-%dT%H:%M:%S")+f"\tTTA accuracy: {acc}\tTop5: {t5}") print('Finished!')
def main(): print("~~epoch\thours\ttop1Accuracy\n") start_time = datetime.now() args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." # create model if args.pretrained: model = models.__dict__[args.arch](pretrained=True) else: model = models.__dict__[args.arch]() model = model.cuda() n_dev = torch.cuda.device_count() if args.fp16: model = network_to_half(model) if args.distributed: model = DDP(model) elif args.dp: model = nn.DataParallel(model) args.batch_size *= n_dev global param_copy if args.fp16: param_copy = [ param.clone().type(torch.cuda.FloatTensor).detach() for param in model.parameters() ] for param in param_copy: param.requires_grad = True else: param_copy = list(model.parameters()) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(param_copy, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) best_prec1 = 0 # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) else: print("=> no checkpoint found at '{}'".format(args.resume)) traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') train_loader, val_loader, train_sampler = get_loaders(traindir, valdir) if args.evaluate: return validate(val_loader, model, criterion, epoch, start_time) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) if epoch == args.epochs - 6: args.sz = 288 args.batch_size = 128 train_loader, val_loader, train_sampler, val_sampler = get_loaders( traindir, valdir, use_val_sampler=False, min_scale=0.5) if args.distributed: train_sampler.set_epoch(epoch) val_sampler.set_epoch(epoch) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) train(train_loader, model, criterion, optimizer, epoch) if args.prof: break prec1 = validate(val_loader, model, criterion, epoch, start_time) if args.rank == 0: is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
def main(): args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() if args.distributed: torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." # create model if args.pretrained: model = models.__dict__[args.arch](pretrained=True) else: model = models.__dict__[args.arch]() model = model.cuda() if args.distributed: model = DDP(model) data, train_sampler = torch_loader(f'{args.data}-160', 128, 256) learner = Learner.from_model_data(model, data) learner.crit = F.cross_entropy learner.metrics = [accuracy, top5] if args.fp16: learner.half() update_model_dir(learner, args.save_dir) wd = 1e-4 lr = 0.1 data, train_sampler = torch_loader(args.data, 224, 192) learner.set_data(data) fit(learner, '1', lr / 4, 1, train_sampler, wd) fit(learner, '1', lr / 2, 1, train_sampler, wd) fit(learner, '2', lr, 28, train_sampler, wd) #data, train_sampler = torch_loader(args.data, 224, 192) #learner.set_data(data) #fit(learner, '3', lr, 5, train_sampler, wd) fit(learner, '4', lr / 10, 25, train_sampler, wd) fit(learner, '5', lr / 100, 25, train_sampler, wd) data, train_sampler = torch_loader(args.data, 288, 128, min_scale=0.5) learner.set_data(data) fit(learner, '6', lr / 500, 10, train_sampler, wd) #save_sched(learner.sched, args.save_dir) #fit(learner, '7', 1e-4, 10, train_sampler, wd/4) # TTA works ~50% of the time. Hoping top5 works better print('\n TTA \n') log_preds, y = learner.TTA() preds = np.mean(np.exp(log_preds), 0) acc = accuracy(torch.FloatTensor(preds), torch.LongTensor(y)) t5 = top5(torch.FloatTensor(preds), torch.LongTensor(y)) print('TTA acc:', acc) print('TTA top5:', t5[0]) with open(f'{args.save_dir}/tta_accuracy.txt', "a", 1) as f: f.write( time.strftime("%Y-%m-%dT%H:%M:%S") + f"\tTTA accuracy: {acc}\tTop5: {t5}") print('Finished!')
def main(): global best_prec1, args args.distributed = args.world_size > 1 # args.gpu = 0 if args.distributed: # args.gpu = args.rank % torch.cuda.device_count() # torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() model = model.cuda() n_dev = torch.cuda.device_count() if args.fp16: model = network_to_half(model) if args.distributed: model = DDP(model) #args.lr *= n_dev elif args.dp: model = nn.DataParallel(model) args.batch_size *= n_dev #args.lr *= n_dev global param_copy if args.fp16: param_copy = [ param.clone().type(torch.cuda.FloatTensor).detach() for param in model.parameters() ] for param in param_copy: param.requires_grad = True else: param_copy = list(model.parameters()) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(param_copy, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(args.sz), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_sampler = ( torch.utils.data.distributed.DistributedSampler(train_dataset) if args.distributed else None) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(int(args.sz * 1.14)), transforms.CenterCrop(args.sz), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) if args.prof: break # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint if args.rank == 0: is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
def main(): args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() if args.distributed: torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() model = model.cuda() if args.distributed: model = DDP(model) if args.train_128: data, train_sampler = fast_loader(f'{args.data}-160', 128) else: data, train_sampler = fast_loader(args.data, args.sz) learner = Learner.from_model_data(model, data) learner.crit = F.cross_entropy learner.metrics = [accuracy, top5] if args.fp16: learner.half() if args.prof: args.epochs = 1 args.cycle_len = .01 if args.use_clr: args.use_clr = tuple(map(float, args.use_clr.split(','))) # 128x128 if args.train_128: save_dir = args.save_dir + '/128' update_model_dir(learner, save_dir) sargs = save_args('first_run_128', save_dir) learner.fit(args.lr, args.epochs, cycle_len=args.cycle_len, train_sampler=train_sampler, wds=args.weight_decay, use_clr_beta=args.use_clr, loss_scale=args.loss_scale, **sargs) save_sched(learner.sched, save_dir) data, train_sampler = fast_loader(args.data, args.sz) learner.set_data(data) # Full size update_model_dir(learner, args.save_dir) sargs = save_args('first_run', args.save_dir) learner.fit(args.lr, args.epochs, cycle_len=args.cycle_len, train_sampler=train_sampler, wds=args.weight_decay, use_clr_beta=args.use_clr, loss_scale=args.loss_scale, **sargs) save_sched(learner.sched, args.save_dir) if args.use_tta: print(accuracy(*learner.TTA())) print('Finished!')
def main(): start_time = datetime.now() args.distributed = True #args.world_size > 1 args.gpu = 0 if args.distributed: import socket args.gpu = args.rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) logger.info('| distributed init (rank {}): {}'.format( args.rank, args.distributed_init_method)) dist.init_process_group( backend=args.dist_backend, init_method=args.distributed_init_method, world_size=args.world_size, rank=args.rank, ) logger.info('| initialized host {} as rank {}'.format( socket.gethostname(), args.rank)) #args.gpu = args.rank % torch.cuda.device_count() #torch.cuda.set_device(args.gpu) #logger.info('initializing...') #dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) #logger.info('initialized') # create model if args.pretrained: model = models.__dict__[args.arch](pretrained=True) else: model = models.__dict__[args.arch]( num_structured_layers=args.num_structured_layers, structure_type=args.structure_type, nblocks=args.nblocks, param=args.param) model = model.cuda() n_dev = torch.cuda.device_count() logger.info('Created model') if args.distributed: model = DDP(model) elif args.dp: model = nn.DataParallel(model) args.batch_size *= n_dev logger.info('Set up data parallel') global structured_params global unstructured_params structured_params = filter( lambda p: hasattr(p, '_is_structured') and p._is_structured, model.parameters()) unstructured_params = filter( lambda p: not (hasattr(p, '_is_structured') and p._is_structured), model.parameters()) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD([{ 'params': structured_params, 'weight_decay': 0.0 }, { 'params': unstructured_params }], args.lr, momentum=args.momentum, weight_decay=args.weight_decay) logger.info('Created optimizer') best_acc1 = 0 # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) else: logger.info("=> no checkpoint found at '{}'".format(args.resume)) if args.small: traindir = os.path.join(args.data + '-sz/160', 'train') valdir = os.path.join(args.data + '-sz/160', 'val') args.sz = 128 else: traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') args.sz = 224 train_loader, val_loader, train_sampler, val_sampler = get_loaders( traindir, valdir, use_val_sampler=True) logger.info('Loaded data') if args.evaluate: return validate(val_loader, model, criterion, epoch, start_time) logger.info(model) logger.info('| model {}, criterion {}'.format( args.arch, criterion.__class__.__name__)) logger.info('| num. model params: {} (num. trained: {})'.format( sum(p.numel() for p in model.parameters()), sum(p.numel() for p in model.parameters() if p.requires_grad), )) for epoch in range(args.start_epoch, args.epochs): logger.info(f'Epoch {epoch}') adjust_learning_rate(optimizer, epoch) if epoch == int(args.epochs * 0.4 + 0.5): traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') args.sz = 224 train_loader, val_loader, train_sampler, val_sampler = get_loaders( traindir, valdir) if epoch == int(args.epochs * 0.92 + 0.5): args.sz = 288 args.batch_size = 128 train_loader, val_loader, train_sampler, val_sampler = get_loaders( traindir, valdir, use_val_sampler=False, min_scale=0.5) if args.distributed: train_sampler.set_epoch(epoch) val_sampler.set_epoch(epoch) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) train(train_loader, model, criterion, optimizer, epoch) if args.prof: break acc1 = validate(val_loader, model, criterion, epoch, start_time) if args.rank == 0: is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best)
def main(): args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() if args.distributed: torch.cuda.set_device(args.gpu) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) if args.fp16: assert torch.backends.cudnn.enabled, "missing cudnn" model = cifar10models.__dict__[ args.arch] if args.arch in cifar10_names else models.__dict__[ args.arch] if args.pretrained: model = model(pretrained=True) else: model = model() model = model.cuda() if args.distributed: model = DDP(model) if args.data_parallel: n_dev = 4 model = nn.DataParallel(model, range(n_dev)) args.batch_size *= n_dev data, train_sampler, val_sampler = torch_loader(args.data, args.sz) learner = Learner.from_model_data(model, data) #print (learner.summary()); exit() learner.crit = F.cross_entropy learner.metrics = [accuracy] if args.fp16: learner.half() if args.prof: args.epochs, args.cycle_len = 1, 0.01 if args.use_clr: args.use_clr = tuple(map(float, args.use_clr.split(','))) # Full size update_model_dir(learner, args.save_dir) sargs = save_args('first_run', args.save_dir) if args.warmup: learner.fit(args.lr / 10, 1, cycle_len=1, sampler=[train_sampler, val_sampler], wds=args.weight_decay, use_clr_beta=(100, 1, 0.9, 0.8), loss_scale=args.loss_scale, **sargs) learner.fit(args.lr, args.epochs, cycle_len=args.cycle_len, sampler=train_sampler, wds=args.weight_decay, use_clr_beta=args.use_clr, loss_scale=args.loss_scale, **sargs) save_sched(learner.sched, args.save_dir) print('Finished!') if args.use_tta: log_preds, y = learner.TTA() preds = np.mean(np.exp(log_preds), 0) acc = accuracy(torch.FloatTensor(preds), torch.LongTensor(y)) print('TTA acc:', acc) with open(f'{args.save_dir}/tta_accuracy.txt', "a", 1) as f: f.write( time.strftime("%Y-%m-%dT%H:%M:%S") + f"\tTTA accuracty: {acc}\n")
x = self.fc2(x) return F.log_softmax(x) model = Net() if args.cuda: model.cuda() #=====START: ADDED FOR DISTRIBUTED====== ''' Wrap model in our version of DistributedDataParallel. This must be done AFTER the model is converted to cuda. ''' if args.distributed: model = DDP(model) #=====END: ADDED FOR DISTRIBUTED====== optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward()
def main(): global args, folder_save args = parser.parse_args() args.distributed = args.world_size > 1 args.gpu = 0 if args.distributed: args.gpu = args.rank % torch.cuda.device_count() print(args) opts = vars(args) name_log = ''.join('{}{}-'.format(key, val) for key, val in sorted(opts.items()) if key is not 'rank') name_log = name_log.replace('/', '-') name_log = name_log.replace('[', '-') name_log = name_log.replace(']', '-') name_log_list = list(map(''.join, zip(*[iter(name_log)] * 100))) print(name_log_list, '\n') folder_save = args.save_folder for i in range(len(name_log_list)): folder_save = os.path.join(folder_save, name_log_list[i]) if not os.path.isdir(folder_save): os.mkdir(folder_save) print('This will be saved in: ' + folder_save, '\n') args.bottleneck_width = json.loads(args.bottleneck_width) args.bottleneck_depth = json.loads(args.bottleneck_depth) if args.distributed: torch.cuda.set_device(args.rank % torch.cuda.device_count()) torch.cuda.set_device(args.gpu) global best_prec1 global scat scat = Scattering(M=224, N=224, J=args.J, pre_pad=False).cuda() def save_checkpoint(state, is_best, filename=os.path.join(folder_save, 'checkpoint.pth.tar')): torch.save(state, filename) if is_best: shutil.copyfile(filename, os.path.join(folder_save, 'model_best.pth.tar')) if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model model = models.__dict__[args.arch](224, args.J, width=args.bottleneck_width, depth=args.bottleneck_depth, conv1x1=args.bottleneck_conv1x1) model.cuda() model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Number of parameters: %d' % params) #### MODIFIED by Edouard save_checkpoint( { 'epoch': -1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': 0, }, False) if args.distributed: model = DDP(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)