def test_warmup_multistep(self): p = nn.Parameter(torch.zeros(0)) opt = torch.optim.SGD([p], lr=5) multiplier = WarmupParamScheduler( MultiStepParamScheduler( [1, 0.1, 0.01, 0.001], milestones=[10, 15, 20], num_updates=30, ), 0.001, 5 / 30, ) sched = LRMultiplier(opt, multiplier, 30) # This is an equivalent of: # sched = WarmupMultiStepLR( # opt, milestones=[10, 15, 20], gamma=0.1, warmup_factor=0.001, warmup_iters=5) p.sum().backward() opt.step() lrs = [0.005] for _ in range(30): sched.step() lrs.append(opt.param_groups[0]["lr"]) self.assertTrue( np.allclose(lrs[:5], [0.005, 1.004, 2.003, 3.002, 4.001])) self.assertTrue(np.allclose(lrs[5:10], 5.0)) self.assertTrue(np.allclose(lrs[10:15], 0.5)) self.assertTrue(np.allclose(lrs[15:20], 0.05)) self.assertTrue(np.allclose(lrs[20:], 0.005))
def test_warmup_cosine(self): p = nn.Parameter(torch.zeros(0)) opt = torch.optim.SGD([p], lr=5) multiplier = CompositeParamScheduler( [ LinearParamScheduler(0.001, 1), # warmup CosineParamScheduler(1, 0), ], interval_scaling=["rescaled", "fixed"], lengths=[5 / 30, 25 / 30], ) sched = LRMultiplier(opt, multiplier, 30) p.sum().backward() opt.step() self.assertEqual(opt.param_groups[0]["lr"], 0.005) lrs = [0.005] for _ in range(30): sched.step() lrs.append(opt.param_groups[0]["lr"]) for idx, lr in enumerate(lrs): expected_cosine = 2.5 * (1.0 + math.cos(math.pi * idx / 30)) if idx >= 5: self.assertAlmostEqual(lr, expected_cosine) else: self.assertNotAlmostEqual(lr, expected_cosine)
def before_train(self): self._optimizer = self._optimizer or self.trainer.optimizer self._scheduler = self._scheduler or self.trainer.scheduler if isinstance(self._scheduler, ParamScheduler): self._scheduler = LRMultiplier( self._optimizer, self._scheduler, self.trainer.max_iter, last_iter=self.trainer.iter - 1, ) # NOTE: some heuristics on what LR to summarize # summarize the param group with most parameters largest_group = max(len(g["params"]) for g in self._optimizer.param_groups) if largest_group == 1: # If all groups have one parameter, # then find the most common initial LR, and use it for summary lr_count = Counter([g["lr"] for g in self._optimizer.param_groups]) lr = lr_count.most_common()[0][0] for i, g in enumerate(self._optimizer.param_groups): if g["lr"] == lr: self._best_param_group_id = i break else: for i, g in enumerate(self._optimizer.param_groups): if len(g["params"]) == largest_group: self._best_param_group_id = i break
class LRScheduler(HookBase): """ A hook which executes a torch builtin LR scheduler and summarizes the LR. It is executed after every iteration. """ def __init__(self, optimizer=None, scheduler=None): """ Args: optimizer (torch.optim.Optimizer): scheduler (torch.optim.LRScheduler or fvcore.common.param_scheduler.ParamScheduler): if a :class:`ParamScheduler` object, it defines the multiplier over the base LR in the optimizer. If any argument is not given, will try to obtain it from the trainer. """ self._optimizer = optimizer self._scheduler = scheduler def before_train(self): self._optimizer = self._optimizer or self.trainer.optimizer self._scheduler = self._scheduler or self.trainer.scheduler if isinstance(self._scheduler, ParamScheduler): self._scheduler = LRMultiplier( self._optimizer, self._scheduler, self.trainer.max_iter, last_iter=self.trainer.iter - 1, ) # NOTE: some heuristics on what LR to summarize # summarize the param group with most parameters largest_group = max(len(g["params"]) for g in self._optimizer.param_groups) if largest_group == 1: # If all groups have one parameter, # then find the most common initial LR, and use it for summary lr_count = Counter([g["lr"] for g in self._optimizer.param_groups]) lr = lr_count.most_common()[0][0] for i, g in enumerate(self._optimizer.param_groups): if g["lr"] == lr: self._best_param_group_id = i break else: for i, g in enumerate(self._optimizer.param_groups): if len(g["params"]) == largest_group: self._best_param_group_id = i break def after_step(self): lr = self._optimizer.param_groups[self._best_param_group_id]["lr"] self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False) self._scheduler.step()
def before_train(self): self._optimizer = self._optimizer or self.trainer.optimizer if isinstance(self.scheduler, ParamScheduler): self._scheduler = LRMultiplier( self._optimizer, self.scheduler, self.trainer.max_iter, last_iter=self.trainer.iter - 1, ) self._best_param_group_id = LRScheduler.get_best_param_group_id(self._optimizer)
def test_warmup_cosine(self): p = nn.Parameter(torch.zeros(0)) opt = torch.optim.SGD([p], lr=5) multiplier = WarmupParamScheduler( CosineParamScheduler(1, 0), 0.001, 5 / 30, ) sched = LRMultiplier(opt, multiplier, 30) p.sum().backward() opt.step() self.assertEqual(opt.param_groups[0]["lr"], 0.005) lrs = [0.005] for _ in range(30): sched.step() lrs.append(opt.param_groups[0]["lr"]) for idx, lr in enumerate(lrs): expected_cosine = 2.5 * (1.0 + math.cos(math.pi * idx / 30)) if idx >= 5: self.assertAlmostEqual(lr, expected_cosine) else: self.assertNotAlmostEqual(lr, expected_cosine)