def test(tmp_path, rep, save_freq): temp_dir = tmp_path / "test" @callbacks.metric_callback_decorator def ca(data): output, target = data["output"], data["data"][1] return { i: v for i, v in enumerate(metrics.classwise_accuracy(output, target)) } model = nn.Linear(10, 10) optimizer = optim.SGD(lr=0.1) c = callbacks.CallbackList( callbacks.AccuracyCallback(), ca, callbacks.WeightSave(save_path=temp_dir, save_freq=save_freq)) epoch = range(1) loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long)) for _ in range(10)] with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, temp_dir), "logger": lambda: reporters.LoggerReporter(c, temp_dir), "tensorboard": lambda: reporters.TensorboardReporter(c, temp_dir) }[rep]() as _rep: tr = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=_rep, verb=False) if rep == "tqdm": epoch = _rep for _ in epoch: tr.train(loader) tr.test(loader) tr.exit() try: # .../test/**/0.pkl save_file = list(Path(temp_dir).glob("*/*.pkl"))[0] except IndexError as e: print(list(Path(temp_dir).glob("*/*"))) raise e tr.resume(save_file) c = callbacks.AccuracyCallback() with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, temp_dir), "logger": lambda: reporters.LoggerReporter(c, temp_dir), "tensorboard": lambda: reporters.TensorboardReporter(c, temp_dir) }[rep]() as _rep: inferencer = Inferencer(model, _rep) inferencer.load(save_file) inferencer.run(loader)
def test(rep): tmpdir = str(gettempdir()) if rep == "tensorboard" and not is_tensorboardX_available: pytest.skip("tensorboardX is not available") @callbacks.metric_callback_decorator def ca(data): output, target = data["output"], data["data"][1] return { i: v for i, v in enumerate(metrics.classwise_accuracy(output, target)) } model = nn.Linear(10, 10) optimizer = optim.SGD(lr=0.1) c = callbacks.CallbackList(callbacks.AccuracyCallback(), ca, callbacks.WeightSave(tmpdir)) epoch = range(1) loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long)) for _ in range(10)] with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, tmpdir), "logger": lambda: reporters.LoggerReporter(c, tmpdir), "tensorboard": lambda: reporters.TensorboardReporter(c, tmpdir) }[rep]() as _rep: tr = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=_rep, verb=False) if rep == "tqdm": epoch = _rep for _ in epoch: tr.train(loader) tr.test(loader) save_file = list(Path(tmpdir).glob("*/*.pkl"))[0] tr.resume(save_file) c = callbacks.AccuracyCallback() with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, tmpdir), "logger": lambda: reporters.LoggerReporter(c, tmpdir), "tensorboard": lambda: reporters.TensorboardReporter(c, tmpdir) }[rep]() as _rep: inferencer = Inferencer(model, _rep) inferencer.load(save_file) inferencer.run(loader)
def get_components(cfg): labeled_loader, unlabeled_loader, val_loader, test_loader = get_dataloader(cfg.data.name, cfg.data.labeled_size, cfg.data.unlabeled_size, cfg.data.val_size, cfg.data.batch_size, cfg.data.random_state, download=cfg.data.download, pilaugment=cfg.data.get('pilaugment', False) ) model = wrn28_2(num_classes=6 if cfg.data.name == "animal" else 10) optimizer = {'adam': optim.Adam(lr=cfg.optim.lr), 'sgd': optim.SGD(lr=cfg.optim.lr, momentum=0.9)}[cfg.optim.name] scheduler = {'adam': None, 'sgd': lr_scheduler.CosineAnnealingWithWarmup(cfg.optim.epochs, 4, cfg.optim.epochs // 100)}[cfg.optim.name] ema_model = partial(EMAModel, ema_rate=cfg.model.ema_rate, weight_decay=cfg.optim.wd * cfg.optim.lr) num_classes = {"animal": 6, "cifar100": 100, "tinyimagenet": 200}.get(cfg.data.name, 10) tq = reporters.TQDMReporter(range(cfg.optim.epochs)) _callbacks = [callbacks.AccuracyCallback(), callbacks.LossCallback(), reporters.IOReporter("."), reporters.TensorboardReporter("."), tq] return PackedLoader(labeled_loader, unlabeled_loader), val_loader, test_loader, model, optimizer, \ scheduler, ema_model, num_classes, tq, _callbacks
def main(cfg: Config): if cfg.enable_accimage: enable_accimage() model = resnet50() optimizer = optim.SGD(lr=1e-1 * cfg.batch_size * get_num_nodes() / 256, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR([30, 60, 80]) train_loader, test_loader = DATASET_REGISTRY("fast_imagenet" if cfg.use_fast_collate else "imagenet")(cfg.batch_size, train_size=cfg.batch_size * 50 if cfg.debug else None, test_size=cfg.batch_size * 50 if cfg.debug else None, num_workers=cfg.num_workers) use_multi_gpus = not is_distributed() and torch.cuda.device_count() > 1 with SupervisedTrainer(model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter(".")], scheduler=scheduler, data_parallel=use_multi_gpus, use_amp=cfg.use_amp, use_cuda_nonblocking=True, use_sync_bn=cfg.use_sync_bn, report_accuracy_topk=5) as trainer: for epoch in trainer.epoch_range(cfg.epochs): trainer.train(train_loader) trainer.test(test_loader) print(f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}")
def train_and_eval(cfg): train_loader, val_loader, test_loader, num_classes = get_dataloader( cfg.data.name, cfg.data.val_size, cfg.data.batch_size, cfg.data.download, cfg.augment, False) model = get_model(cfg.model.name, num_classes) optimizer = optim.SGD(cfg.optim.model.lr, momentum=0.9, weight_decay=cfg.optim.model.weight_decay) scheduler = lr_scheduler.MultiStepLR(cfg.optim.model.steps) tq = reporters.TQDMReporter(range(cfg.optim.epochs), verb=cfg.verb) callback = [ callbacks.AccuracyCallback(), callbacks.LossCallback(), reporters.TensorboardReporter("."), reporters.IOReporter("."), tq ] with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=callback, scheduler=scheduler) as trainer: for ep in tq: trainer.train(train_loader) trainer.test(val_loader, 'val') trainer.test(test_loader)
def main(): model = { "resnet20": resnet20, "wrn28_10": wrn28_10 }[args.model](num_classes=10) weight_decay = {"resnet20": 1e-4, "wrn28_10": 5e-4}[args.model] lr_decay = {"resnet20": 0.1, "wrn28_10": 0.2}[args.model] train_loader, test_loader = cifar10_loaders(args.batch_size) optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=lr_decay) tq = reporters.TQDMReporter(range(args.epochs), verb=True) c = [ callbacks.AccuracyCallback(), callbacks.LossCallback(), reporters.IOReporter("."), reporters.TensorboardReporter("."), callbacks.WeightSave("."), tq ] with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=c, scheduler=scheduler) as trainer: for _ in tq: trainer.train(train_loader) trainer.test(test_loader)
def main(): model = se_resnet50(num_classes=1000) optimizer = optim.SGD(lr=0.6 / 1024 * args.batch_size, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR([50, 70]) c = [callbacks.AccuracyCallback(), callbacks.LossCallback()] r = reporters.TQDMReporter(range(args.epochs), callbacks=c) tb = reporters.TensorboardReporter(c) rep = callbacks.CallbackList(r, tb, callbacks.WeightSave("checkpoints")) if args.distributed: # DistributedSupervisedTrainer sets up torch.distributed if args.local_rank == 0: print("\nuse DistributedDataParallel") trainer = DistributedSupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=rep, scheduler=scheduler, init_method=args.init_method, backend=args.backend) else: multi_gpus = torch.cuda.device_count() > 1 if multi_gpus: print("\nuse DataParallel") trainer = SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=rep, scheduler=scheduler, data_parallel=multi_gpus) # if distributed, need to setup loaders after DistributedSupervisedTrainer train_loader, test_loader = imagenet_loaders(args.root, args.batch_size, distributed=args.distributed, num_train_samples=args.batch_size * 10 if args.debug else None, num_test_samples=args.batch_size * 10 if args.debug else None) for _ in r: trainer.train(train_loader) trainer.test(test_loader)
def main(): if is_distributed(): init_distributed() model = se_resnet50(num_classes=1000) optimizer = optim.SGD(lr=0.6 / 1024 * args.batch_size, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR([50, 70]) train_loader, test_loader = DATASET_REGISTRY("imagenet")(args.batch_size) c = [ callbacks.AccuracyCallback(), callbacks.AccuracyCallback(k=5), callbacks.LossCallback(), callbacks.WeightSave("."), reporters.TensorboardReporter("."), reporters.TQDMReporter(range(args.epochs)), ] with SupervisedTrainer( model, optimizer, F.cross_entropy, callbacks=c, scheduler=scheduler, ) as trainer: for _ in c[-1]: trainer.train(train_loader) trainer.test(test_loader)
def main(): if args.distributed: init_distributed() model = se_resnet50(num_classes=1000) optimizer = optim.SGD(lr=0.6 / 1024 * args.batch_size, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR([50, 70]) train_loader, test_loader = imagenet_loaders(args.root, args.batch_size, distributed=args.distributed, num_train_samples=args.batch_size * 10 if args.debug else None, num_test_samples=args.batch_size * 10 if args.debug else None) c = [callbacks.AccuracyCallback(), callbacks.AccuracyCallback(k=5), callbacks.LossCallback(), callbacks.WeightSave('.'), reporters.TensorboardReporter('.'), reporters.TQDMReporter(range(args.epochs))] with SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=c, scheduler=scheduler, ) as trainer: for _ in c[-1]: trainer.train(train_loader) trainer.test(test_loader)
def main(cfg): if cfg.use_accimage: enable_accimage() model = MODEL_REGISTRY(cfg.model.name)(num_classes=10) train_loader, test_loader = DATASET_REGISTRY("fast_cifar10" if cfg.use_fast_collate else "cifar10" )(cfg.data.batch_size, num_workers=4, use_prefetcher=cfg.use_prefetcher) optimizer = None if cfg.bn_no_wd else optim.SGD(lr=1e-1, momentum=0.9, weight_decay=cfg.optim.weight_decay) scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=cfg.optim.lr_decay) if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "bn" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ {"params": bn_params, "weight_decay": 0}, {"params": non_bn_parameters, "weight_decay": cfg.optim.weight_decay}, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer if cfg.use_zerograd_none: import types def set_optimizer(trainer): # see Apex for details def zero_grad(self): for group in self.param_groups: for p in group['params']: p.grad = None trainer.optimizer = trainer.optimizer(trainer.model.parameters()) trainer.optimizer.zero_grad = types.MethodType(zero_grad, trainer.optimizer) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter('.')], scheduler=scheduler, use_amp=cfg.use_amp, debug=cfg.debug ) as trainer: for _ in trainer.epoch_range(cfg.optim.epochs): trainer.train(train_loader) trainer.test(test_loader) print(f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}")
def main(cfg): model = { "resnet20": resnet20, "wrn28_10": wrn28_10 }[cfg.model](num_classes=10) weight_decay = {"resnet20": 1e-4, "wrn28_10": 5e-4}[cfg.model] lr_decay = {"resnet20": 0.1, "wrn28_10": 0.2}[cfg.model] train_loader, test_loader = vision_loaders("cifar10", cfg.batch_size) optimizer = None if cfg.bn_no_wd else optim.SGD( lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=lr_decay) tq = reporters.TQDMReporter(range(cfg.epochs), verb=True) c = [ callbacks.AccuracyCallback(), callbacks.LossCallback(), reporters.IOReporter("."), reporters.TensorboardReporter("."), callbacks.WeightSave("."), tq ] if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "bn" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ { "params": bn_params, "weight_decay": 0 }, { "params": non_bn_parameters, "weight_decay": weight_decay }, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=c, scheduler=scheduler) as trainer: for _ in tq: trainer.train(train_loader) trainer.test(test_loader)
def main(): if args.distributed: init_distributed() if args.enable_accimage: enable_accimage() model = resnet50() optimizer = optim.SGD(lr=1e-1 * args.batch_size * get_num_nodes() / 256, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR([30, 60, 80]) c = [callbacks.AccuracyCallback(), callbacks.LossCallback()] r = reporters.TQDMReporter(range(args.epochs), callbacks=c) tb = reporters.TensorboardReporter(c) rep = callbacks.CallbackList(r, tb, callbacks.WeightSave("checkpoints")) _train_loader, _test_loader = imagenet_loaders( args.root, args.batch_size, distributed=args.distributed, num_train_samples=args.batch_size * 10 if args.debug else None, num_test_samples=args.batch_size * 10 if args.debug else None) if args.distributed: # DistributedSupervisedTrainer sets up torch.distributed if args.local_rank == 0: print("\nuse DistributedDataParallel\n") trainer = DistributedSupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=rep, scheduler=scheduler, init_method=args.init_method, backend=args.backend, enable_amp=args.enable_amp) else: use_multi_gpus = torch.cuda.device_count() > 1 if use_multi_gpus: print("\nuse DataParallel\n") trainer = SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=rep, data_parallel=use_multi_gpus) for epoch in r: if args.use_prefetcher: train_loader = prefetcher.DataPrefetcher(_train_loader) test_loader = prefetcher.DataPrefetcher(_test_loader) else: train_loader, test_loader = _train_loader, _test_loader # following apex's training scheme trainer.train(train_loader) trainer.test(test_loader) rep.close()
def main(cfg): if cfg.use_accimage: enable_accimage() model = MODEL_REGISTRY(cfg.name)(num_classes=10) train_loader, test_loader = DATASET_REGISTRY( "fast_cifar10" if cfg.use_fast_collate else "cifar10")( cfg.batch_size, num_workers=4, use_prefetcher=cfg.use_prefetcher) optimizer = None if cfg.bn_no_wd else optim.SGD( lr=cfg.lr, momentum=0.9, weight_decay=cfg.weight_decay) scheduler = lr_scheduler.CosineAnnealingWithWarmup(cfg.epochs, 4, 5) if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "bn" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ { "params": bn_params, "weight_decay": 0 }, { "params": non_bn_parameters, "weight_decay": cfg.weight_decay }, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer( model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter('.')], scheduler=scheduler, use_amp=cfg.use_amp, debug=cfg.debug) as trainer: for _ in trainer.epoch_range(cfg.epochs): trainer.train(train_loader) trainer.test(test_loader) trainer.scheduler.step() print( f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}" )
def main(cfg: Config): if cfg.gpu is not None: torch.cuda.set_device(cfg.gpu) if homura.is_master(): import rich rich.print(cfg) vs = DATASET_REGISTRY("imagenet") vs.collate_fn = fast_collate if cfg.data.mixup == 0 else gen_mixup_collate( cfg.data.mixup) model = MLPMixers(cfg.model.name)(num_classes=1_000, droppath_rate=cfg.model.droppath_rate) train_da = vs.default_train_da.copy() if cfg.data.autoaugment: train_da.append(AutoAugment()) post_da = [RandomErasing()] if cfg.data.random_erasing else None train_loader, test_loader = vs( batch_size=cfg.data.batch_size, train_da=train_da, post_norm_train_da=post_da, train_size=cfg.data.batch_size * 50 if cfg.debug else None, test_size=cfg.data.batch_size * 50 if cfg.debug else None, num_workers=12) optimizer = homura.optim.AdamW(cfg.optim.lr, weight_decay=cfg.optim.weight_decay, multi_tensor=True) scheduler = homura.lr_scheduler.CosineAnnealingWithWarmup( cfg.optim.epochs, multiplier=cfg.optim.multiplier, warmup_epochs=cfg.optim.warmup_epochs, min_lr=cfg.optim.min_lr) with Trainer(model, optimizer, SmoothedCrossEntropy(cfg.optim.label_smoothing), reporters=[reporters.TensorboardReporter(".")], scheduler=scheduler, use_amp=cfg.amp, use_cuda_nonblocking=True, report_accuracy_topk=5, optim_cfg=cfg.optim, debug=cfg.debug, cfg=cfg.model) as trainer: for ep in trainer.epoch_range(cfg.optim.epochs): trainer.train(train_loader) trainer.test(test_loader) trainer.scheduler.step() if not cfg.no_save: trainer.save(f"outputs/{cfg.model.name}", f"{ep}") print( f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}" )
def main(cfg): if cfg.distributed.enable: init_distributed(use_horovod=cfg.distributed.use_horovod, backend=cfg.distributed.backend, init_method=cfg.distributed.init_method) if cfg.enable_accimage: enable_accimage() model = resnet50() optimizer = optim.SGD(lr=1e-1 * cfg.batch_size * get_num_nodes() / 256, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR([30, 60, 80]) tq = reporters.TQDMReporter(range(cfg.epochs)) c = [ callbacks.AccuracyCallback(), callbacks.AccuracyCallback(k=5), callbacks.LossCallback(), tq, reporters.TensorboardReporter("."), reporters.IOReporter(".") ] _train_loader, _test_loader = imagenet_loaders( cfg.root, cfg.batch_size, distributed=cfg.distributed.enable, num_train_samples=cfg.batch_size * 10 if cfg.debug else None, num_test_samples=cfg.batch_size * 10 if cfg.debug else None) use_multi_gpus = not cfg.distributed.enable and torch.cuda.device_count( ) > 1 with SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=c, scheduler=scheduler, data_parallel=use_multi_gpus, use_horovod=cfg.distributed.use_horovod) as trainer: for epoch in tq: if cfg.use_prefetcher: train_loader = prefetcher.DataPrefetcher(_train_loader) test_loader = prefetcher.DataPrefetcher(_test_loader) else: train_loader, test_loader = _train_loader, _test_loader # following apex's training scheme trainer.train(train_loader) trainer.test(test_loader)
def main(): Trainer = trainers.SupervisedTrainer if args.baseline else MixupTrainer model = MODELS[args.model](num_classes=NUMCLASSES[args.dataset]) train_loader, test_loader = DATASETS[args.dataset](args.batch_size) optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR(args.steps, gamma=0.1) c = [callbacks.AccuracyCallback(), callbacks.LossCallback()] with reporters.TQDMReporter( range(args.epochs), callbacks=c) as tq, reporters.TensorboardReporter(c) as tb: trainer = Trainer(model, optimizer, naive_cross_entropy_loss, callbacks=[tq, tb], scheduler=scheduler, alpha=args.alpha, num_classes=NUMCLASSES[args.dataset]) for _ in tq: trainer.train(train_loader) trainer.test(test_loader)
def main(): model = MODELS[args.teacher_model](num_classes=10) train_loader, test_loader = cifar10_loaders(args.batch_size) weight_decay = 1e-4 if "resnet" in args.teacher_model else 5e-4 lr_decay = 0.1 if "resnet" in args.teacher_model else 0.2 optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([50, 80], gamma=lr_decay) trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, scheduler=scheduler) trainer.logger.info("Train the teacher model!") for _ in trange(args.teacher_epochs, ncols=80): trainer.train(train_loader) trainer.test(test_loader) teacher_model = model.eval() weight_decay = 1e-4 if "resnet" in args.student_model else 5e-4 lr_decay = 0.1 if "resnet" in args.student_model else 0.2 optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([50, 80], gamma=lr_decay) model = MODELS[args.student_model](num_classes=10) c = [callbacks.AccuracyCallback(), callbacks.LossCallback(), kl_loss] with reporters.TQDMReporter( range(args.student_epochs), callbacks=c) as tq, reporters.TensorboardReporter(c) as tb: trainer = DistillationTrainer(model, optimizer, F.cross_entropy, callbacks=[tq, tb], scheduler=scheduler, teacher_model=teacher_model, temperature=args.temperature) trainer.logger.info("Train the student model!") for _ in tq: trainer.train(train_loader) trainer.test(test_loader)
def _main(cfg): model = MODEL_REGISTRY(cfg.model)(num_classes=10) train_loader, test_loader = DATASET_REGISTRY("cifar10")(cfg.batch_size, num_workers=4, download=True) optimizer = homura.optim.SGD(lr=cfg.optim.lr, momentum=0.9, weight_decay=cfg.optim.weight_decay) scheduler = { "cosine": lr_scheduler.CosineAnnealingWithWarmup(cfg.optim.epochs, 4, 5), "abel": ABEL(cfg.optim.gamma), "steps": lr_scheduler.MultiStepLR(milestones=cfg.optim.steps, gamma=cfg.optim.gamma), }[cfg.optim.name] with Trainer( model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter(".")], scheduler=scheduler, use_amp=cfg.use_amp, ) as trainer: for _ in trainer.epoch_range(cfg.optim.epochs): trainer.train(train_loader) trainer.test(test_loader) trainer.scheduler.step() print(f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}")
def main(cfg): if cfg.use_accimage: enable_accimage() data = DATASET_REGISTRY(cfg.data).setup( cfg.batch_size, num_workers=4, download=cfg.download, prefetch_factor=cfg.prefetch_factor, persistent_workers=cfg.persistent_workers) model = MODEL_REGISTRY(cfg.model)(num_classes=data.num_classes) optimizer = None if cfg.bn_no_wd else optim.SGD( lr=cfg.lr, momentum=0.9, weight_decay=cfg.weight_decay, multi_tensor=cfg.use_multi_tensor) scheduler = lr_scheduler.CosineAnnealingWithWarmup(cfg.epochs, 4, 5) if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "norm" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ { "params": bn_params, "weight_decay": 0 }, { "params": non_bn_parameters, "weight_decay": cfg.weight_decay }, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer( model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter('.')], scheduler=scheduler, use_amp=cfg.use_amp, use_channel_last=cfg.use_channel_last, debug=cfg.debug) as trainer: for _ in trainer.epoch_range(cfg.epochs): trainer.train(data.train_loader) trainer.test(data.test_loader) trainer.scheduler.step() print( f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}" )