def get_lm_model( pretrained_model_name: str, config_dict: Optional[dict] = None, config_file: Optional[str] = None, checkpoint_file: Optional[str] = None, vocab_file: Optional[str] = None, ) -> BertModule: """ Helper function to instantiate a language model encoder, either from scratch or a pretrained model. If only pretrained_model_name are passed, a pretrained model is returned. If a configuration is passed, whether as a file or dictionary, the model is initialized with random weights. Args: pretrained_model_name: pretrained model name, for example, bert-base-uncased or megatron-bert-cased. See get_pretrained_lm_models_list() for full list. config_dict: path to the model configuration dictionary config_file: path to the model configuration file checkpoint_file: path to the pretrained model checkpoint vocab_file: path to vocab_file to be used with Megatron-LM Returns: Pretrained BertModule """ # check valid model type if not pretrained_model_name or pretrained_model_name not in get_pretrained_lm_models_list( include_external=False): logging.warning( f'{pretrained_model_name} is not in get_pretrained_lm_models_list(include_external=False), ' f'will be using AutoModel from HuggingFace.') # warning when user passes both configuration dict and file if config_dict and config_file: logging.warning( f"Both config_dict and config_file were found, defaulting to use config_file: {config_file} will be used." ) if "megatron" in pretrained_model_name: model, checkpoint_file = get_megatron_lm_model( config_dict=config_dict, config_file=config_file, pretrained_model_name=pretrained_model_name, checkpoint_file=checkpoint_file, vocab_file=vocab_file, ) else: model = get_huggingface_lm_model( config_dict=config_dict, config_file=config_file, pretrained_model_name=pretrained_model_name, ) if checkpoint_file: app_state = AppState() if not app_state.is_model_being_restored and not os.path.exists( checkpoint_file): raise ValueError(f'{checkpoint_file} not found') model.restore_weights(restore_path=checkpoint_file) return model
def __init__( self, model_name: Optional[str] = None, pretrained: bool = True, config_dict: Optional[dict] = None, checkpoint_file: Optional[str] = None, vocab_file: Optional[str] = None, ): """Gets Megatron BERT based model to be used as an Encoder in NeMo NLP. Use the model_name arg to get a named model architecture. Available model names can be found with get_megatron_lm_models_list(). Use the pretrained arg to get the named model architecture with or without pretrained weights. Use config_dict to pass in arguments needed for Megatron-LM. For example, to instantiate a Megatron BERT large model we would do: config_dict={ 'hidden_size': 1024, 'num_attention_heads': 16, 'num_layers': 24, 'max_position_embeddings: 512, } Args: model_name (Optional[str]): Named model Megatron architecture from NeMo. Defaults to None. pretrained (bool): Use True to get pretrained weights. False will use the same architecture but with randomly initialized weights. Not implemented yet for Megatron encoders. Defaults to True. config_dict (Optional[dict], optional): Use for configuration of the Megatron model. Defaults to None. checkpoint_file (Optional[str], optional): Provide weights for the transformer from a local checkpoint. If using model parallel then this should be a directory. Defaults to None. vocab_file (Optional[str], optional): Path to vocab file that was used when pretraining the Megatron model. """ super().__init__() if not pretrained: raise ValueError( 'We currently only support pretrained Megatron models. Please set pretrained=True' ) if not checkpoint_file and not model_name: raise ValueError( 'Currently Megatron models must be loaded from a pretrained model name or a pretrained checkpoint.' ) if model_name or checkpoint_file: model, checkpoint_file = get_megatron_lm_model( pretrained_model_name=model_name, config_dict=config_dict, checkpoint_file=checkpoint_file, vocab_file=vocab_file, ) self._checkpoint_file = checkpoint_file self._hidden_size = model.hidden_size self._vocab_size = model.vocab_size self._encoder = model