def test_basic_trainer(): model = nn.Linear(10, 10) optimizer = optim.SGD() scheduler = lr_scheduler.StepLR(9) trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, scheduler=scheduler, update_scheduler_by_epoch=False) loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long)) for _ in range(10)] for _ in trainer.epoch_range(1): trainer.train(loader) assert pytest.approx(trainer.optimizer.param_groups[0]["lr"], 0.01) optimizer = torch.optim.SGD(model.parameters(), lr=1e-1) trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, scheduler=scheduler, update_scheduler_by_epoch=False) for _ in trainer.epoch_range(1): trainer.train(loader) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 9) trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, scheduler=scheduler, update_scheduler_by_epoch=False) trainer.run(loader, loader, 15, 11) assert trainer.step == 11 - 1
def main(): model = { "resnet20": resnet20, "wrn28_10": wrn28_10 }[args.model](num_classes=10) weight_decay = {"resnet20": 1e-4, "wrn28_10": 5e-4}[args.model] lr_decay = {"resnet20": 0.1, "wrn28_10": 0.2}[args.model] train_loader, test_loader = cifar10_loaders(args.batch_size) optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=lr_decay) tq = reporters.TQDMReporter(range(args.epochs), verb=True) c = [ callbacks.AccuracyCallback(), callbacks.LossCallback(), reporters.IOReporter("."), reporters.TensorboardReporter("."), callbacks.WeightSave("."), tq ] with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=c, scheduler=scheduler) as trainer: for _ in tq: trainer.train(train_loader) trainer.test(test_loader)
def train_and_eval(cfg): train_loader, val_loader, test_loader, num_classes = get_dataloader( cfg.data.name, cfg.data.val_size, cfg.data.batch_size, cfg.data.download, cfg.augment, False) model = get_model(cfg.model.name, num_classes) optimizer = optim.SGD(cfg.optim.model.lr, momentum=0.9, weight_decay=cfg.optim.model.weight_decay) scheduler = lr_scheduler.MultiStepLR(cfg.optim.model.steps) tq = reporters.TQDMReporter(range(cfg.optim.epochs), verb=cfg.verb) callback = [ callbacks.AccuracyCallback(), callbacks.LossCallback(), reporters.TensorboardReporter("."), reporters.IOReporter("."), tq ] with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=callback, scheduler=scheduler) as trainer: for ep in tq: trainer.train(train_loader) trainer.test(val_loader, 'val') trainer.test(test_loader)
def main(cfg): if cfg.use_accimage: enable_accimage() model = MODEL_REGISTRY(cfg.model.name)(num_classes=10) train_loader, test_loader = DATASET_REGISTRY("fast_cifar10" if cfg.use_fast_collate else "cifar10" )(cfg.data.batch_size, num_workers=4, use_prefetcher=cfg.use_prefetcher) optimizer = None if cfg.bn_no_wd else optim.SGD(lr=1e-1, momentum=0.9, weight_decay=cfg.optim.weight_decay) scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=cfg.optim.lr_decay) if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "bn" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ {"params": bn_params, "weight_decay": 0}, {"params": non_bn_parameters, "weight_decay": cfg.optim.weight_decay}, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer if cfg.use_zerograd_none: import types def set_optimizer(trainer): # see Apex for details def zero_grad(self): for group in self.param_groups: for p in group['params']: p.grad = None trainer.optimizer = trainer.optimizer(trainer.model.parameters()) trainer.optimizer.zero_grad = types.MethodType(zero_grad, trainer.optimizer) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter('.')], scheduler=scheduler, use_amp=cfg.use_amp, debug=cfg.debug ) as trainer: for _ in trainer.epoch_range(cfg.optim.epochs): trainer.train(train_loader) trainer.test(test_loader) print(f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}")
def main(cfg): model = { "resnet20": resnet20, "wrn28_10": wrn28_10 }[cfg.model](num_classes=10) weight_decay = {"resnet20": 1e-4, "wrn28_10": 5e-4}[cfg.model] lr_decay = {"resnet20": 0.1, "wrn28_10": 0.2}[cfg.model] train_loader, test_loader = vision_loaders("cifar10", cfg.batch_size) optimizer = None if cfg.bn_no_wd else optim.SGD( lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=lr_decay) tq = reporters.TQDMReporter(range(cfg.epochs), verb=True) c = [ callbacks.AccuracyCallback(), callbacks.LossCallback(), reporters.IOReporter("."), reporters.TensorboardReporter("."), callbacks.WeightSave("."), tq ] if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "bn" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ { "params": bn_params, "weight_decay": 0 }, { "params": non_bn_parameters, "weight_decay": weight_decay }, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=c, scheduler=scheduler) as trainer: for _ in tq: trainer.train(train_loader) trainer.test(test_loader)
def main(cfg): if cfg.use_accimage: enable_accimage() model = MODEL_REGISTRY(cfg.name)(num_classes=10) train_loader, test_loader = DATASET_REGISTRY( "fast_cifar10" if cfg.use_fast_collate else "cifar10")( cfg.batch_size, num_workers=4, use_prefetcher=cfg.use_prefetcher) optimizer = None if cfg.bn_no_wd else optim.SGD( lr=cfg.lr, momentum=0.9, weight_decay=cfg.weight_decay) scheduler = lr_scheduler.CosineAnnealingWithWarmup(cfg.epochs, 4, 5) if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "bn" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ { "params": bn_params, "weight_decay": 0 }, { "params": non_bn_parameters, "weight_decay": cfg.weight_decay }, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer( model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter('.')], scheduler=scheduler, use_amp=cfg.use_amp, debug=cfg.debug) as trainer: for _ in trainer.epoch_range(cfg.epochs): trainer.train(train_loader) trainer.test(test_loader) trainer.scheduler.step() print( f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}" )
def test(tmp_path, rep, save_freq): temp_dir = tmp_path / "test" @callbacks.metric_callback_decorator def ca(data): output, target = data["output"], data["data"][1] return { i: v for i, v in enumerate(metrics.classwise_accuracy(output, target)) } model = nn.Linear(10, 10) optimizer = optim.SGD(lr=0.1) c = callbacks.CallbackList( callbacks.AccuracyCallback(), ca, callbacks.WeightSave(save_path=temp_dir, save_freq=save_freq)) epoch = range(1) loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long)) for _ in range(10)] with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, temp_dir), "logger": lambda: reporters.LoggerReporter(c, temp_dir), "tensorboard": lambda: reporters.TensorboardReporter(c, temp_dir) }[rep]() as _rep: tr = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=_rep, verb=False) if rep == "tqdm": epoch = _rep for _ in epoch: tr.train(loader) tr.test(loader) tr.exit() try: # .../test/**/0.pkl save_file = list(Path(temp_dir).glob("*/*.pkl"))[0] except IndexError as e: print(list(Path(temp_dir).glob("*/*"))) raise e tr.resume(save_file) c = callbacks.AccuracyCallback() with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, temp_dir), "logger": lambda: reporters.LoggerReporter(c, temp_dir), "tensorboard": lambda: reporters.TensorboardReporter(c, temp_dir) }[rep]() as _rep: inferencer = Inferencer(model, _rep) inferencer.load(save_file) inferencer.run(loader)
def test(rep): tmpdir = str(gettempdir()) if rep == "tensorboard" and not is_tensorboardX_available: pytest.skip("tensorboardX is not available") @callbacks.metric_callback_decorator def ca(data): output, target = data["output"], data["data"][1] return { i: v for i, v in enumerate(metrics.classwise_accuracy(output, target)) } model = nn.Linear(10, 10) optimizer = optim.SGD(lr=0.1) c = callbacks.CallbackList(callbacks.AccuracyCallback(), ca, callbacks.WeightSave(tmpdir)) epoch = range(1) loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long)) for _ in range(10)] with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, tmpdir), "logger": lambda: reporters.LoggerReporter(c, tmpdir), "tensorboard": lambda: reporters.TensorboardReporter(c, tmpdir) }[rep]() as _rep: tr = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=_rep, verb=False) if rep == "tqdm": epoch = _rep for _ in epoch: tr.train(loader) tr.test(loader) save_file = list(Path(tmpdir).glob("*/*.pkl"))[0] tr.resume(save_file) c = callbacks.AccuracyCallback() with { "tqdm": lambda: reporters.TQDMReporter(epoch, c, tmpdir), "logger": lambda: reporters.LoggerReporter(c, tmpdir), "tensorboard": lambda: reporters.TensorboardReporter(c, tmpdir) }[rep]() as _rep: inferencer = Inferencer(model, _rep) inferencer.load(save_file) inferencer.run(loader)
def test_update_scheduler(): model = nn.Linear(10, 10) optimizer = optim.SGD(lr=0.1) trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy) trainer.update_scheduler(lr_scheduler.LambdaLR(lambda step: 0.1 ** step), update_scheduler_by_epoch=False) loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long)) for _ in range(2)] trainer.train(loader) # lambda calculates the factor! assert list(trainer.optimizer.param_groups)[0]['lr'] == 0.1 ** 2 trainer.update_scheduler(lr_scheduler.LambdaLR(lambda epoch: 0.1 ** epoch, last_epoch=1), update_scheduler_by_epoch=True) trainer.train(loader) assert list(trainer.optimizer.param_groups)[0]['lr'] == 0.1 ** 3
def main(batch_size): layers = ["layer1.0.conv1", "layer2.0.conv1", "layer3.0.conv1", "fc"] train_loader, test_loader = cifar10_loaders(128) weight_save = callbacks.WeightSave("checkpoints") model = resnet20(num_classes=10) model2 = deepcopy(model) optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 50) trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, scheduler=scheduler, callbacks=weight_save, verb=False) for ep in trange(100, ncols=80): trainer.train(train_loader) hooks1 = [CCAHook(model, name, svd_device=args.device) for name in layers] hooks2 = [CCAHook(model2, name, svd_device=args.device) for name in layers] device = next(model.parameters()).device model2.to(device) input = hooks1[0].data(train_loader.dataset, batch_size=batch_size).to(device) history = [] def distance(): model.eval() model2.eval() with torch.no_grad(): model(input) model2(input) return [h1.distance(h2) for h1, h2 in zip(hooks1, hooks2)] # 0 and 99 history.append(distance()) # 29 and 99, ... for ep in (29, 49, 99): saved = torch.load(weight_save.save_path / f"{ep}.pkl") model2.load_state_dict(saved["model"]) history.append(distance()) plot(history, layers)
def main(): model = MODELS[args.teacher_model](num_classes=10) train_loader, test_loader = cifar10_loaders(args.batch_size) weight_decay = 1e-4 if "resnet" in args.teacher_model else 5e-4 lr_decay = 0.1 if "resnet" in args.teacher_model else 0.2 optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([50, 80], gamma=lr_decay) trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy, scheduler=scheduler) trainer.logger.info("Train the teacher model!") for _ in trange(args.teacher_epochs, ncols=80): trainer.train(train_loader) trainer.test(test_loader) teacher_model = model.eval() weight_decay = 1e-4 if "resnet" in args.student_model else 5e-4 lr_decay = 0.1 if "resnet" in args.student_model else 0.2 optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay) scheduler = lr_scheduler.MultiStepLR([50, 80], gamma=lr_decay) model = MODELS[args.student_model](num_classes=10) c = [callbacks.AccuracyCallback(), callbacks.LossCallback(), kl_loss] with reporters.TQDMReporter( range(args.student_epochs), callbacks=c) as tq, reporters.TensorboardReporter(c) as tb: trainer = DistillationTrainer(model, optimizer, F.cross_entropy, callbacks=[tq, tb], scheduler=scheduler, teacher_model=teacher_model, temperature=args.temperature) trainer.logger.info("Train the student model!") for _ in tq: trainer.train(train_loader) trainer.test(test_loader)
def main(cfg): if cfg.use_accimage: enable_accimage() data = DATASET_REGISTRY(cfg.data).setup( cfg.batch_size, num_workers=4, download=cfg.download, prefetch_factor=cfg.prefetch_factor, persistent_workers=cfg.persistent_workers) model = MODEL_REGISTRY(cfg.model)(num_classes=data.num_classes) optimizer = None if cfg.bn_no_wd else optim.SGD( lr=cfg.lr, momentum=0.9, weight_decay=cfg.weight_decay, multi_tensor=cfg.use_multi_tensor) scheduler = lr_scheduler.CosineAnnealingWithWarmup(cfg.epochs, 4, 5) if cfg.bn_no_wd: def set_optimizer(trainer): bn_params = [] non_bn_parameters = [] for name, p in trainer.model.named_parameters(): if "norm" in name: bn_params.append(p) else: non_bn_parameters.append(p) optim_params = [ { "params": bn_params, "weight_decay": 0 }, { "params": non_bn_parameters, "weight_decay": cfg.weight_decay }, ] trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9) trainers.SupervisedTrainer.set_optimizer = set_optimizer with trainers.SupervisedTrainer( model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter('.')], scheduler=scheduler, use_amp=cfg.use_amp, use_channel_last=cfg.use_channel_last, debug=cfg.debug) as trainer: for _ in trainer.epoch_range(cfg.epochs): trainer.train(data.train_loader) trainer.test(data.test_loader) trainer.scheduler.step() print( f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}" )