Exemplo n.º 1
0
 def pre_init(self, hparams):
     self.output_dir = Path(hparams.output_dir)
     self.output_dir.mkdir(exist_ok=True)
     teacher = BartForConditionalGeneration.from_pretrained(
         hparams.teacher).eval()
     student_updates = {
         "decoder_layers": hparams.student_decoder_layers,
         "encoder_layers": hparams.student_encoder_layers,
     }
     if hparams.length_penalty != -1:
         student_updates["length_penalty"] = hparams.length_penalty
     d_layers_to_copy = get_layers_to_copy(
         student_updates["decoder_layers"], teacher.config.decoder_layers)
     e_layers_to_copy: List = get_layers_to_copy(
         student_updates["encoder_layers"], teacher.config.encoder_layers)
     hparams.d_layer_to_copy = d_layers_to_copy
     hparams.e_layer_to_copy = e_layers_to_copy
     kw = teacher.config.to_diff_dict()
     kw.update(student_updates)
     # Copy weights
     student_cfg = BartConfig(**kw)
     student = BartForConditionalGeneration(student_cfg)
     student, _ = init_student(student, teacher)
     save_dir = self.output_dir.joinpath("student")
     self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams,
                          student, teacher)
     student.save_pretrained(save_dir)
     hparams.model_name_or_path = str(save_dir)
     return student, student_cfg, teacher
Exemplo n.º 2
0
    def pre_init(self, hparams):
        raise NotImplementedError("T5 Distillation does not work yet")
        self.output_dir = Path(hparams.output_dir)
        self.output_dir.mkdir(exist_ok=True)
        teacher = T5ForConditionalGeneration.from_pretrained(hparams.teacher)
        n_layer = hparams.student_decoder_layers
        assert n_layer == hparams.student_encoder_layers  # TODO(SS): relax this constraint so that we can do 12-6.
        d_layers_to_copy = get_layers_to_copy(n_layer,
                                              len(teacher.decoder.block))
        e_layers_to_copy: List = get_layers_to_copy(n_layer,
                                                    len(teacher.encoder.block))
        student_updates = {"num_layers": n_layer}
        hparams.d_layer_to_copy = d_layers_to_copy
        hparams.e_layer_to_copy = e_layers_to_copy
        kw = teacher.config.to_diff_dict()

        kw.update(student_updates)
        # Copy weights
        student_cfg = T5Config(**kw)
        student = T5ForConditionalGeneration(student_cfg)
        student, _ = init_student(student, teacher)
        self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams,
                             student, teacher)
        Path(hparams.output_dir).mkdir(exist_ok=True)
        task_specific_params = student.config.task_specific_params
        if task_specific_params is not None:
            student.config.update(task_specific_params.get(
                "summarization", {}))  # TODO: dont hardcode
        save_dir = self.output_dir.joinpath("student")
        save_dir.mkdir(exist_ok=True)

        student.save_pretrained(save_dir)
        hparams.model_name_or_path = str(save_dir)
        return student, student_cfg, teacher
Exemplo n.º 3
0
    def pre_init(self, hparams):
        self.output_dir = Path(hparams.output_dir)
        self.output_dir.mkdir(exist_ok=True)
        teacher = AutoModelForSeq2SeqLM.from_pretrained(hparams.teacher).eval()
        student_updates = {
            "decoder_layers": hparams.student_decoder_layers,
            "encoder_layers": hparams.student_encoder_layers,
        }
        if hparams.length_penalty != -1:
            student_updates["length_penalty"] = hparams.length_penalty
        e_layers_to_copy: List = get_layers_to_copy(student_updates["encoder_layers"], teacher.config.encoder_layers)
        hparams.e_layer_to_copy = e_layers_to_copy

        d_layers_to_copy: List = get_layers_to_copy(student_updates["decoder_layers"], teacher.config.decoder_layers)

        if hparams.supervise_forward:
            hparams.d_matches = get_layers_to_supervise(
                student_updates["decoder_layers"], teacher.config.decoder_layers
            )
        else:
            hparams.d_matches = d_layers_to_copy
        hparams.d_layer_to_copy = d_layers_to_copy

        kw = teacher.config.to_diff_dict()
        kw.update(student_updates)
        # Copy weights
        student_cfg = teacher.config_class(**kw)
        student = type(teacher)(student_cfg)
        student, _ = init_student(student, teacher)
        save_dir = self.output_dir.joinpath("student")
        self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams, student, teacher)
        student.save_pretrained(save_dir)
        hparams.model_name_or_path = str(save_dir)
        return student, student_cfg, teacher
Exemplo n.º 4
0
    def pre_init(self, hparams):
        teacher = T5ForConditionalGeneration.from_pretrained(hparams.teacher)
        n_layer = hparams.student_decoder_layers
        assert n_layer == hparams.student_encoder_layers  # TODO(SS): relax this
        d_layers_to_copy = get_layers_to_copy(n_layer,
                                              len(teacher.decoder.block))
        e_layers_to_copy: List = get_layers_to_copy(n_layer,
                                                    len(teacher.encoder.block))
        student_updates = {"num_layers": n_layer}
        hparams.d_layer_to_copy = d_layers_to_copy
        hparams.e_layer_to_copy = e_layers_to_copy
        kw = teacher.config.to_diff_dict()

        kw.update(student_updates)
        # Copy weights
        student_cfg = T5Config(**kw)
        student = T5ForConditionalGeneration(student_cfg)
        student, _ = init_student(student, teacher)
        self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams,
                             student, teacher)
        Path(hparams.output_dir).mkdir(exist_ok=True)
        task_specific_params = student.config.task_specific_params
        if task_specific_params is not None:
            student.config.update(task_specific_params.get(
                "summarization", {}))
        return d_layers_to_copy, student, student_cfg, teacher
Exemplo n.º 5
0
 def pre_init(self, hparams):
     # Dump empty student model at a path, then call from_pretrained on it
     teacher = BartForConditionalGeneration.from_pretrained(
         hparams.teacher).eval()
     student_updates = {
         "decoder_layers": hparams.student_decoder_layers,
         "encoder_layers": hparams.student_encoder_layers,
     }
     d_layers_to_copy = get_layers_to_copy(
         student_updates["decoder_layers"], teacher.config.decoder_layers)
     e_layers_to_copy: List = get_layers_to_copy(
         student_updates["encoder_layers"], teacher.config.encoder_layers)
     hparams.d_layer_to_copy = d_layers_to_copy
     hparams.e_layer_to_copy = e_layers_to_copy
     kw = teacher.config.to_diff_dict()
     kw.update(student_updates)
     # Copy weights
     student_cfg = BartConfig(**kw)
     student = BartForConditionalGeneration(student_cfg)
     student, _ = init_student(student, teacher)
     self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams,
                          student, teacher)
     Path(hparams.output_dir).mkdir(exist_ok=True)
     return d_layers_to_copy, student, student_cfg, teacher