Exemplo n.º 1
0
    def create_optimizer_and_scheduler(self, num_training_steps: int):
        """
        Edited to use fixed Adafactor.

        Setup the optimizer and the learning rate scheduler.

        We provide a reasonable default that works well. If you want to use something else, you can pass a tuple in the
        Trainer's init through :obj:`optimizers`, or subclass and override this method in a subclass.
        """
        if self.optimizer is None:
            no_decay = ["bias", "LayerNorm.weight"]
            optimizer_grouped_parameters = [
                {
                    "params": [
                        p for n, p in self.model.named_parameters()
                        if not any(nd in n for nd in no_decay)
                    ],
                    "weight_decay":
                    self.args.weight_decay,
                },
                {
                    "params": [
                        p for n, p in self.model.named_parameters()
                        if any(nd in n for nd in no_decay)
                    ],
                    "weight_decay":
                    0.0,
                },
            ]
            optimizer_cls = FixedAdafactor if self.args.adafactor else AdamW
            if self.args.adafactor:
                optimizer_kwargs = {
                    "scale_parameter": False,
                    "relative_step": False
                }
            else:
                optimizer_kwargs = {
                    "betas": (self.args.adam_beta1, self.args.adam_beta2),
                    "eps": self.args.adam_epsilon,
                }
            optimizer_kwargs["lr"] = self.args.learning_rate
            if self.sharded_dpp:
                self.optimizer = OSS(
                    params=optimizer_grouped_parameters,
                    optim=optimizer_cls,
                    **optimizer_kwargs,
                )
            else:
                self.optimizer = optimizer_cls(optimizer_grouped_parameters,
                                               **optimizer_kwargs)

        if self.lr_scheduler is None:
            self.lr_scheduler = get_scheduler(
                self.args.lr_scheduler_type,
                self.optimizer,
                num_warmup_steps=self.args.warmup_steps,
                num_training_steps=num_training_steps,
            )
def load_optimzer_and_scheduler(
    model,
    dataloader,
    epochs,
    learning_rate,
    adam_beta1,
    adam_beta2,
    adam_epsilon,
    lr_scheduler_type,
    warmup_steps,
):
    # Creating the optimizer and scheduler
    max_steps = len(dataloader) * epochs
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
    ]

    optimizer = AdamW(optimizer_grouped_parameters,
                      betas=(adam_beta1, adam_beta2),
                      eps=adam_epsilon,
                      lr=learning_rate)
    lr_scheduler = get_scheduler(
        name=SchedulerType(lr_scheduler_type),
        optimizer=optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=max_steps,
    )
    return optimizer, lr_scheduler
Exemplo n.º 3
0
def build_default_lr_scheduler(
    optimizer: torch.optim.Optimizer,
    scheduler_kwargs: hf_parse.LRSchedulerKwargs,
) -> Any:
    """
    This follows the function in transformer's Trainer to construct the lr_scheduler.

    Args:
        optimizer: optimizer to apply lr_scheduler to
        scheduler_kwargs: see LRSchedulerKwargs in _config_parser.py for expected fields.
    Returns:
        lr_scheduler configured accordingly
    """
    return hf_opt.get_scheduler(
        scheduler_kwargs.lr_scheduler_type,
        optimizer,
        num_warmup_steps=scheduler_kwargs.num_warmup_steps,
        num_training_steps=scheduler_kwargs.num_training_steps,
    )
Exemplo n.º 4
0
def main(args: Config):
    """"""
    accelerator = Accelerator()
    logger.info(f'accelerator state:\n{accelerator.state}')

    # 设置随机数
    set_random_seed(args)

    # 模型准备
    logger.info('***** Model prepare *****')
    model, tokenizer = model_prepare(args)

    # 数据准备
    logger.info('***** Data prepare *****')
    train_dl, val_dl = data_prepare(args, tokenizer)

    # 优化器准备
    logger.info('***** Optimizer prepare *****')
    optimizer = optimizer_prepare(args, model)

    # accelerator prepare
    model, optimizer, train_dl, val_dl = accelerator.prepare(
        model, optimizer, train_dl, val_dl)

    # prepare lr_scheduler
    num_update_steps_per_epoch = math.ceil(
        len(train_dl) / args.gradient_accumulation_steps)
    args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
    lr_scheduler = get_scheduler(
        name=args.lr_scheduler_type,
        optimizer=optimizer,
        num_warmup_steps=args.num_warmup_steps,
        num_training_steps=args.max_train_steps,
    )

    # train
    logger.info("***** Start training *****")
    train(args, model, optimizer, train_dl, val_dl, accelerator, lr_scheduler)

    # 模型保存
    logger.info("***** Model save *****")
    model_save(args, accelerator, model, tokenizer)
Exemplo n.º 5
0
    def create_scheduler(self, num_training_steps: int):
        """
        Setup the scheduler. The optimizer of the trainer must have been set up before this method is called.

        Args:
            num_training_steps (int): The number of training steps to do.
        """
        if self.lr_scheduler is None:
            warmup_steps = (
                self.args.warmup_steps
                if self.args.warmup_steps > 0
                else math.ceil(num_training_steps * self.args.warmup_ratio)
            )

            self.lr_scheduler = get_scheduler(
                self.args.lr_scheduler_type,
                self.optimizer,
                num_warmup_steps=warmup_steps,
                num_training_steps=num_training_steps,
            )