def test_cwrstar(self): # SIT scenario model, optimizer, criterion, my_nc_benchmark = self.init_sit() last_fc_name, _ = get_last_fc_layer(model) strategy = CWRStar(model, optimizer, criterion, last_fc_name, train_mb_size=64, device=self.device) self.run_strategy(my_nc_benchmark, strategy) # MT scenario strategy = CWRStar(model, optimizer, criterion, last_fc_name, train_mb_size=64, device=self.device) benchmark = self.load_benchmark(use_task_labels=True) self.run_strategy(benchmark, strategy)
def __init__( self, criterion=None, lr: float = 0.001, momentum=0.9, l2=0.0005, train_epochs: int = 4, init_update_rate: float = 0.01, inc_update_rate=0.00005, max_r_max=1.25, max_d_max=0.5, inc_step=4.1e-05, rm_sz: int = 1500, freeze_below_layer: str = "lat_features.19.bn.beta", latent_layer_num: int = 19, ewc_lambda: float = 0, train_mb_size: int = 128, eval_mb_size: int = 128, device=None, plugins: Optional[Sequence[SupervisedPlugin]] = None, evaluator: EvaluationPlugin = default_evaluator, eval_every=-1, ): """ Creates an instance of the AR1 strategy. :param criterion: The loss criterion to use. Defaults to None, in which case the cross entropy loss is used. :param lr: The learning rate (SGD optimizer). :param momentum: The momentum (SGD optimizer). :param l2: The L2 penalty used for weight decay. :param train_epochs: The number of training epochs. Defaults to 4. :param init_update_rate: The initial update rate of BatchReNorm layers. :param inc_update_rate: The incremental update rate of BatchReNorm layers. :param max_r_max: The maximum r value of BatchReNorm layers. :param max_d_max: The maximum d value of BatchReNorm layers. :param inc_step: The incremental step of r and d values of BatchReNorm layers. :param rm_sz: The size of the replay buffer. The replay buffer is shared across classes. Defaults to 1500. :param freeze_below_layer: A string describing the name of the layer to use while freezing the lower (nearest to the input) part of the model. The given layer is not frozen (exclusive). :param latent_layer_num: The number of the layer to use as the Latent Replay Layer. Usually this is the same of `freeze_below_layer`. :param ewc_lambda: The Synaptic Intelligence lambda term. Defaults to 0, which means that the Synaptic Intelligence regularization will not be applied. :param train_mb_size: The train minibatch size. Defaults to 128. :param eval_mb_size: The eval minibatch size. Defaults to 128. :param device: The device to use. Defaults to None (cpu). :param plugins: (optional) list of StrategyPlugins. :param evaluator: (optional) instance of EvaluationPlugin for logging and metric computations. :param eval_every: the frequency of the calls to `eval` inside the training loop. if -1: no evaluation during training. if 0: calls `eval` after the final epoch of each training experience. if >0: calls `eval` every `eval_every` epochs and at the end of all the epochs for a single experience. """ warnings.warn("The AR1 strategy implementation is in an alpha stage " "and is not perfectly aligned with the paper " "implementation. Please use at your own risk!") if plugins is None: plugins = [] # Model setup model = MobilenetV1(pretrained=True, latent_layer_num=latent_layer_num) replace_bn_with_brn( model, momentum=init_update_rate, r_d_max_inc_step=inc_step, max_r_max=max_r_max, max_d_max=max_d_max, ) fc_name, fc_layer = get_last_fc_layer(model) if ewc_lambda != 0: # Synaptic Intelligence is not applied to the last fully # connected layer (and implicitly to "freeze below" ones. plugins.append( SynapticIntelligencePlugin(ewc_lambda, excluded_parameters=[fc_name])) self.cwr_plugin = CWRStarPlugin(model, cwr_layer_name=fc_name, freeze_remaining_model=False) plugins.append(self.cwr_plugin) optimizer = SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=l2) if criterion is None: criterion = CrossEntropyLoss() self.ewc_lambda = ewc_lambda self.freeze_below_layer = freeze_below_layer self.rm_sz = rm_sz self.inc_update_rate = inc_update_rate self.max_r_max = max_r_max self.max_d_max = max_d_max self.lr = lr self.momentum = momentum self.l2 = l2 self.rm = None self.cur_acts: Optional[Tensor] = None self.replay_mb_size = 0 super().__init__( model, optimizer, criterion, train_mb_size=train_mb_size, train_epochs=train_epochs, eval_mb_size=eval_mb_size, device=device, plugins=plugins, evaluator=evaluator, eval_every=eval_every, )