def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path,
                              config_json_path):
    """
    Copy/paste/tweak model's weights to our BERT structure.
    """
    model = torch.load(checkpoint_path, map_location="cpu")
    sd = model["model"]
    cfg = BartConfig.from_json_file(config_json_path)
    m = BartForConditionalGeneration(cfg)
    valid_keys = m.model.state_dict().keys()
    failures = []
    mapping = {}
    for k, v in sd.items():
        if k in IGNORE_KEYS:
            continue

        new_k = rename_state_dict_key(k)
        if new_k not in valid_keys:
            failures.append([k, new_k])
        else:
            mapping[new_k] = v
    if cfg.normalize_before:  # Blenderbot-3B checkpoints. Rename layernorm_embedding -> layer_norm
        rename_layernorm_keys(sd)
    m.model.load_state_dict(mapping, strict=True)
    m.half()
    m.save_pretrained(pytorch_dump_folder_path)
Exemplo n.º 2
0
 def pre_init(self, hparams):
     self.output_dir = Path(hparams.output_dir)
     self.output_dir.mkdir(exist_ok=True)
     teacher = BartForConditionalGeneration.from_pretrained(
         hparams.teacher).eval()
     student_updates = {
         "decoder_layers": hparams.student_decoder_layers,
         "encoder_layers": hparams.student_encoder_layers,
     }
     if hparams.length_penalty != -1:
         student_updates["length_penalty"] = hparams.length_penalty
     d_layers_to_copy = get_layers_to_copy(
         student_updates["decoder_layers"], teacher.config.decoder_layers)
     e_layers_to_copy: List = get_layers_to_copy(
         student_updates["encoder_layers"], teacher.config.encoder_layers)
     hparams.d_layer_to_copy = d_layers_to_copy
     hparams.e_layer_to_copy = e_layers_to_copy
     kw = teacher.config.to_diff_dict()
     kw.update(student_updates)
     # Copy weights
     student_cfg = BartConfig(**kw)
     student = BartForConditionalGeneration(student_cfg)
     student, _ = init_student(student, teacher)
     save_dir = self.output_dir.joinpath("student")
     self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams,
                          student, teacher)
     student.save_pretrained(save_dir)
     hparams.model_name_or_path = str(save_dir)
     return student, student_cfg, teacher
Exemplo n.º 3
0
def train(
    config: TrainConfig,
    model: BartForConditionalGeneration,
    train_dataloader: DataLoader,
    dev_dataloader: DataLoader,
    optimizer: Adam,
    logger: logging.Logger,
    device=torch.device,
):
    """ 지정된 Epoch만큼 모델을 학습시키는 함수입니다. """
    model.to(device)
    global_step = 0
    for epoch in range(1, config.num_epochs + 1):
        model.train()
        loss_sum = 0.0
        for data in train_dataloader:
            global_step += 1
            data = _change_device(data, device)
            optimizer.zero_grad()
            output = model.forward(
                input_ids=data[0],
                attention_mask=data[1],
                decoder_input_ids=data[2],
                labels=data[3],
                decoder_attention_mask=data[4],
                return_dict=True,
            )
            loss = output["loss"]
            loss.backward()
            loss_sum += loss.item()

            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            if global_step % config.train_log_interval == 0:
                mean_loss = loss_sum / config.train_log_interval
                logger.info(
                    f"Epoch {epoch} Step {global_step} " f"Loss {mean_loss:.4f} Perplexity {math.exp(mean_loss):8.2f}"
                )
                loss_sum = 0.0
            if global_step % config.dev_log_interval == 0:
                _validate(model, dev_dataloader, logger, device)
            if global_step % config.save_interval == 0:
                model.save_pretrained(f"{config.save_model_file_prefix}_{global_step}")