def build_optimizer(self, trn, epochs, lr, adam_epsilon, weight_decay, warmup_steps, transformer_lr, **kwargs): # noinspection PyProtectedMember transformer = self._get_transformer() if transformer: num_training_steps = len(trn) * epochs // self.config.get('gradient_accumulation', 1) optimizer, scheduler = build_optimizer_scheduler_with_transformer(self.model, transformer, lr, transformer_lr, num_training_steps, warmup_steps, weight_decay, adam_epsilon) else: optimizer = torch.optim.Adam(self.model.parameters(), self.config.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=optimizer, mode='max', factor=0.5, patience=2, verbose=True, ) return optimizer, scheduler
def build_optimizer(self, epochs, trn, gradient_accumulation, **kwargs): config = self.config model = self.model if isinstance(model, nn.DataParallel): model = model.module transformer = self._get_transformer_builder() if transformer and transformer.trainable: transformer = self._get_transformer() optimizer = Adam( set(model.parameters()) - set(transformer.parameters()), config.lr, (config.mu, config.nu), config.epsilon) if self.config.transformer_lr: num_training_steps = len(trn) * epochs // gradient_accumulation if not self.config.separate_optimizer: optimizer, scheduler = build_optimizer_scheduler_with_transformer( model, transformer, config.lr, config.transformer_lr, num_training_steps, config.warmup_steps, config.weight_decay, config.epsilon) transformer_optimizer, transformer_scheduler = None, None else: transformer_optimizer, transformer_scheduler = \ build_optimizer_scheduler_with_transformer(transformer, transformer, config.lr, config.transformer_lr, num_training_steps, config.warmup_steps, config.weight_decay, config.epsilon) else: transformer.requires_grad_(False) transformer_optimizer, transformer_scheduler = None, None else: optimizer = Adam(model.parameters(), config.lr, (config.mu, config.nu), config.epsilon) transformer_optimizer, transformer_scheduler = None, None if self.config.separate_optimizer: scheduler = ExponentialLR(optimizer, config.decay**(1 / config.decay_steps)) # noinspection PyUnboundLocalVariable optimizer = Adam(model.parameters(), **{ 'lr': 0.002, 'betas': (0.9, 0.9), 'eps': 1e-12 }) scheduler = ExponentialLR(optimizer, **{'gamma': 0.9999424652406974}) return optimizer, scheduler, transformer_optimizer, transformer_scheduler
def build_optimizer(self, epochs, trn, gradient_accumulation, **kwargs): config = self.config model = self.model if isinstance(model, nn.DataParallel): model = model.module if self.config.transformer: transformer = model.encoder.transformer optimizer = Adam( set(model.parameters()) - set(transformer.parameters()), config.lr, (config.mu, config.nu), config.epsilon) if self.config.transformer_lr: num_training_steps = len(trn) * epochs // gradient_accumulation if self.config.separate_optimizer: transformer_optimizer, transformer_scheduler = \ build_optimizer_scheduler_with_transformer(transformer, transformer, config.transformer_lr, config.transformer_lr, num_training_steps, config.warmup_steps, config.weight_decay, adam_epsilon=1e-8) else: optimizer, scheduler = build_optimizer_scheduler_with_transformer( model, transformer, config.lr, config.transformer_lr, num_training_steps, config.warmup_steps, config.weight_decay, adam_epsilon=1e-8) transformer_optimizer, transformer_scheduler = None, None else: transformer.requires_grad_(False) transformer_optimizer, transformer_scheduler = None, None else: optimizer = Adam(model.parameters(), config.lr, (config.mu, config.nu), config.epsilon) transformer_optimizer, transformer_scheduler = None, None if self.config.separate_optimizer: scheduler = ExponentialLR(optimizer, config.decay**(1 / config.decay_steps)) # noinspection PyUnboundLocalVariable return optimizer, scheduler, transformer_optimizer, transformer_scheduler
def build_optimizer(self, trn, epochs, gradient_accumulation=1, lr=1e-3, transformer_lr=5e-5, adam_epsilon=1e-8, weight_decay=0.0, warmup_steps=0.1, **kwargs): num_training_steps = len(trn) * epochs // gradient_accumulation optimizer, scheduler = build_optimizer_scheduler_with_transformer( self.model, self.model.base_model, lr, transformer_lr, num_training_steps, warmup_steps, weight_decay, adam_epsilon) return optimizer, scheduler
def build_optimizer(self, trn, epochs, lr, adam_epsilon, weight_decay, warmup_steps, transformer_lr=None, gradient_accumulation=1, **kwargs): num_training_steps = len(trn) * epochs // gradient_accumulation if transformer_lr is None: transformer_lr = lr transformer = find_transformer(self.model.embed) optimizer, scheduler = build_optimizer_scheduler_with_transformer( self.model, transformer, lr, transformer_lr, num_training_steps, warmup_steps, weight_decay, adam_epsilon) return optimizer, scheduler
def build_optimizer(self, trn, epochs, lr, adam_epsilon, weight_decay, warmup_steps, transformer_lr=None, teacher=None, **kwargs): num_training_steps = len(trn) * epochs // self.config.get('gradient_accumulation', 1) if transformer_lr is None: transformer_lr = lr transformer = self.model.encoder.transformer optimizer, scheduler = build_optimizer_scheduler_with_transformer(self.model, transformer, lr, transformer_lr, num_training_steps, warmup_steps, weight_decay, adam_epsilon) if teacher: lambda_scheduler = LinearTeacherAnnealingScheduler(num_training_steps) scheduler = (scheduler, lambda_scheduler) return optimizer, scheduler