Пример #1
0
    def __init__(
        self,
        model_name: str,
        *,
        max_length: int = None,
        sub_module: str = None,
        train_parameters: bool = True,
        last_layer_only: bool = True,
        override_weights_file: Optional[str] = None,
        override_weights_strip_prefix: Optional[str] = None,
        masked_language_modeling: bool = True,
    ) -> None:
        TokenEmbedder.__init__(self)  # Call the base class constructor
        tokenizer = PretrainedTransformerTokenizer(model_name)
        self.masked_language_modeling = masked_language_modeling

        if self.masked_language_modeling:
            self.config = AutoConfig.from_pretrained(model_name,
                                                     output_hidden_states=True)
            # We only need access to the HF tokenizer if we are masked language modeling
            self.tokenizer = tokenizer.tokenizer
            # The only differences when masked language modeling are:
            # 1) `output_hidden_states` must be True to get access to token embeddings.
            # 2) We need to use `AutoModelForMaskedLM` to get the correct model
            self.transformer_model = AutoModelForMaskedLM.from_pretrained(
                model_name, config=self.config)
        # Eveything after the if statement (including the else) is copied directly from:
        # https://github.com/allenai/allennlp/blob/master/allennlp/modules/token_embedders/pretrained_transformer_embedder.py
        else:
            from allennlp.common import cached_transformers

            self.transformer_model = cached_transformers.get(
                model_name, True, override_weights_file,
                override_weights_strip_prefix)
            self.config = self.transformer_model.config

        if sub_module:
            assert hasattr(self.transformer_model, sub_module)
            self.transformer_model = getattr(self.transformer_model,
                                             sub_module)
        self._max_length = max_length

        # I'm not sure if this works for all models; open an issue on github if you find a case
        # where it doesn't work.
        self.output_dim = self.config.hidden_size

        self._scalar_mix: Optional[ScalarMix] = None
        if not last_layer_only:
            self._scalar_mix = ScalarMix(self.config.num_hidden_layers)
            self.config.output_hidden_states = True

        self._num_added_start_tokens = len(
            tokenizer.single_sequence_start_tokens)
        self._num_added_end_tokens = len(tokenizer.single_sequence_end_tokens)
        self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens

        if not train_parameters:
            for param in self.transformer_model.parameters():
                param.requires_grad = False
    def __init__(
        self,
        model_name: str,
        *,
        max_length: int = None,
        sub_module: str = None,
        train_parameters: bool = True,
        last_layer_only: bool = True,
        override_weights_file: Optional[str] = None,
        override_weights_strip_prefix: Optional[str] = None,
        gradient_checkpointing: Optional[bool] = None,
        tokenizer_kwargs: Optional[Dict[str, Any]] = None,
        transformer_kwargs: Optional[Dict[str, Any]] = None,
        masked_language_modeling: bool = True,
        load_directory: Optional[str] = None
    ) -> None:
        TokenEmbedder.__init__(self)  # Call the base class constructor
        tokenizer = PretrainedTransformerTokenizer(model_name, tokenizer_kwargs=tokenizer_kwargs)
        self.masked_language_modeling = masked_language_modeling

        if self.masked_language_modeling:
            self.config = AutoConfig.from_pretrained(model_name, output_hidden_states=True)
            # We only need access to the HF tokenizer if we are masked language modeling
            self.tokenizer = tokenizer.tokenizer
            # The only differences when masked language modeling are:
            # 1) `output_hidden_states` must be True to get access to token embeddings.
            # 2) We need to use `AutoModelForMaskedLM` to get the correct model
            self.transformer_model = AutoModelForMaskedLM.from_pretrained(
            # self.transformer_model = RobertaForAugment.from_pretrained()
                model_name, config=self.config, **(transformer_kwargs or {})
            )

            if load_directory is not None:
                print("Loading Model from:", load_directory)
                state = torch.load(load_directory)
                model_dict = self.transformer_model.state_dict()
                # ckpt__dict = state['state_dict']
                state = {k: v for k, v in state.items() if k in model_dict}
                model_dict.update(state) 
                self.transformer_model.load_state_dict(model_dict, strict=False)
                print("Loading Model from:", load_directory, "...Finished.")
        # Eveything after the if statement (including the else) is copied directly from:
        # https://github.com/allenai/allennlp/blob/master/allennlp/modules/token_embedders/pretrained_transformer_embedder.py
        else:
            from allennlp.common import cached_transformers

            self.transformer_model = cached_transformers.get(
                model_name, True, override_weights_file, override_weights_strip_prefix
            )
            self.config = self.transformer_model.config

        if gradient_checkpointing is not None:
            self.transformer_model.config.update({"gradient_checkpointing": gradient_checkpointing})

        if sub_module:
            assert hasattr(self.transformer_model, sub_module)
            self.transformer_model = getattr(self.transformer_model, sub_module)

        # print("max_length", max_length)
        self._max_length = max_length

        # I'm not sure if this works for all models; open an issue on github if you find a case
        # where it doesn't work.
        self.output_dim = self.config.hidden_size

        self._scalar_mix: Optional[ScalarMix] = None
        if not last_layer_only:
            self._scalar_mix = ScalarMix(self.config.num_hidden_layers)
            self.config.output_hidden_states = True

        self._num_added_start_tokens = len(tokenizer.single_sequence_start_tokens)
        self._num_added_end_tokens = len(tokenizer.single_sequence_end_tokens)
        self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens

        self.encoder = BertEncoder(self.config)
        self.layer = torch.nn.ModuleList([BertLayer(self.config)
                                    for _ in range(self.config.num_hidden_layers)])
        self.embeddings = BertEmbeddings(self.config)
        self.output_hidden_states = self.config.output_hidden_states

        if not train_parameters:
            for param in self.transformer_model.parameters():
                param.requires_grad = False