示例#1
0
文件: lm_utils.py 项目: manneh/NeMo
def get_lm_model(
    pretrained_model_name: str,
    config_dict: Optional[dict] = None,
    config_file: Optional[str] = None,
    checkpoint_file: Optional[str] = None,
    vocab_file: Optional[str] = None,
) -> BertModule:
    """
    Helper function to instantiate a language model encoder, either from scratch or a pretrained model.
    If only pretrained_model_name are passed, a pretrained model is returned.
    If a configuration is passed, whether as a file or dictionary, the model is initialized with random weights.

    Args:
        pretrained_model_name: pretrained model name, for example, bert-base-uncased or megatron-bert-cased.
            See get_pretrained_lm_models_list() for full list.
        config_dict: path to the model configuration dictionary
        config_file: path to the model configuration file
        checkpoint_file: path to the pretrained model checkpoint
        vocab_file: path to vocab_file to be used with Megatron-LM

    Returns:
        Pretrained BertModule
    """

    # check valid model type
    if not pretrained_model_name or pretrained_model_name not in get_pretrained_lm_models_list(
            include_external=False):
        logging.warning(
            f'{pretrained_model_name} is not in get_pretrained_lm_models_list(include_external=False), '
            f'will be using AutoModel from HuggingFace.')

    # warning when user passes both configuration dict and file
    if config_dict and config_file:
        logging.warning(
            f"Both config_dict and config_file were found, defaulting to use config_file: {config_file} will be used."
        )

    if "megatron" in pretrained_model_name:
        model, checkpoint_file = get_megatron_lm_model(
            config_dict=config_dict,
            config_file=config_file,
            pretrained_model_name=pretrained_model_name,
            checkpoint_file=checkpoint_file,
            vocab_file=vocab_file,
        )
    else:
        model = get_huggingface_lm_model(
            config_dict=config_dict,
            config_file=config_file,
            pretrained_model_name=pretrained_model_name,
        )

    if checkpoint_file:
        app_state = AppState()
        if not app_state.is_model_being_restored and not os.path.exists(
                checkpoint_file):
            raise ValueError(f'{checkpoint_file} not found')
        model.restore_weights(restore_path=checkpoint_file)

    return model
示例#2
0
    def __init__(
        self,
        model_name: Optional[str] = None,
        pretrained: bool = True,
        config_dict: Optional[dict] = None,
        checkpoint_file: Optional[str] = None,
        vocab_file: Optional[str] = None,
    ):
        """Gets Megatron BERT based model to be used as an Encoder in NeMo NLP.
        Use the model_name arg to get a named model architecture. 
        Available model names can be found with get_megatron_lm_models_list(). 
        Use the pretrained arg to get the named model architecture with or without pretrained weights.

        Use config_dict to pass in arguments needed for Megatron-LM.
        For example, to instantiate a Megatron BERT large model we would do:
            config_dict={
                'hidden_size': 1024,
	            'num_attention_heads': 16,
                'num_layers': 24,
                'max_position_embeddings: 512, 
            } 


        Args:
            model_name (Optional[str]): Named model Megatron architecture from NeMo. Defaults to None.
            pretrained (bool): Use True to get pretrained weights. 
                                        False will use the same architecture but with randomly initialized weights.
                                        Not implemented yet for Megatron encoders.
                                        Defaults to True.
            config_dict (Optional[dict], optional): Use for configuration of the Megatron model. Defaults to None.
            checkpoint_file (Optional[str], optional): Provide weights for the transformer from a local checkpoint.
                                                       If using model parallel then this should be a directory. Defaults to None.
            vocab_file (Optional[str], optional): Path to vocab file that was used when pretraining the Megatron model.
        """
        super().__init__()

        if not pretrained:
            raise ValueError(
                'We currently only support pretrained Megatron models. Please set pretrained=True'
            )

        if not checkpoint_file and not model_name:
            raise ValueError(
                'Currently Megatron models must be loaded from a pretrained model name or a pretrained checkpoint.'
            )

        if model_name or checkpoint_file:
            model, checkpoint_file = get_megatron_lm_model(
                pretrained_model_name=model_name,
                config_dict=config_dict,
                checkpoint_file=checkpoint_file,
                vocab_file=vocab_file,
            )

        self._checkpoint_file = checkpoint_file
        self._hidden_size = model.hidden_size
        self._vocab_size = model.vocab_size

        self._encoder = model