def _load(cls, config: Params, serialization_dir: str, weights_file: str = None, cuda_device: int = -1) -> "Model": """ Instantiates an already-trained model, based on the experiment configuration and some optional overrides. """ weights_file = weights_file or os.path.join(serialization_dir, _DEFAULT_WEIGHTS) # Load vocabulary from file vocab_dir = os.path.join(serialization_dir, "vocabulary") # If the config specifies a vocabulary subclass, we need to use it. vocab_params = config.get("vocabulary", Params({})) vocab_choice = vocab_params.pop_choice("type", Vocabulary.list_available(), True) vocab = Vocabulary.by_name(vocab_choice).from_files( vocab_dir, vocab_params.get("padding_token", None), vocab_params.get("oov_token", None)) model_params = config.get("model") # The experiment config tells us how to _train_ a model, including where to get pre-trained # embeddings from. We're now _loading_ the model, so those embeddings will already be # stored in our weights. We don't need any pretrained weight file anymore, and we don't # want the code to look for it, so we remove it from the parameters here. remove_pretrained_embedding_params(model_params) model = Model.from_params(vocab=vocab, params=model_params) # If vocab+embedding extension was done, the model initialized from from_params # and one defined by state dict in weights_file might not have same embedding shapes. # Eg. when model embedder module was transferred along with vocab extension, the # initialized embedding weight shape would be smaller than one in the state_dict. # So calling model embedding extension is required before load_state_dict. # If vocab and model embeddings are in sync, following would be just a no-op. model.extend_embedder_vocab() model_state = torch.load(weights_file, map_location=util.device_mapping(cuda_device)) model.load_state_dict(model_state) # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are # in sync with the weights if cuda_device >= 0: model.cuda(cuda_device) else: model.cpu() return model
def _load(cls, config: Params, serialization_dir: str, weights_file: str = None, cuda_device: int = -1) -> 'Model': """ Instantiates an already-trained model, based on the experiment configuration and some optional overrides. """ weights_file = weights_file or os.path.join(serialization_dir, _DEFAULT_WEIGHTS) # Load vocabulary from file vocab_dir = os.path.join(serialization_dir, 'vocabulary') # If the config specifies a vocabulary subclass, we need to use it. vocab_params = config.get("vocabulary", Params({})) vocab_choice = vocab_params.pop_choice("type", Vocabulary.list_available(), True) vocab = Vocabulary.by_name(vocab_choice).from_files(vocab_dir) model_params = config.get('model') # The experiment config tells us how to _train_ a model, including where to get pre-trained # embeddings from. We're now _loading_ the model, so those embeddings will already be # stored in our weights. We don't need any pretrained weight file anymore, and we don't # want the code to look for it, so we remove it from the parameters here. remove_pretrained_embedding_params(model_params) model = Model.from_params(vocab=vocab, params=model_params) model_state = torch.load(weights_file, map_location=util.device_mapping(cuda_device)) model.load_state_dict(model_state) # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are # in sync with the weights if cuda_device >= 0: model.cuda(cuda_device) else: model.cpu() return model
def _load(cls, config: Params, serialization_dir: str, weights_file: str = None, cuda_device: int = -1) -> 'Model': """ Instantiates an already-trained model, based on the experiment configuration and some optional overrides. """ weights_file = weights_file or os.path.join(serialization_dir, _DEFAULT_WEIGHTS) # Load vocabulary from file vocab_dir = os.path.join(serialization_dir, 'vocabulary') # If the config specifies a vocabulary subclass, we need to use it. vocab_params = config.get("vocabulary", Params({})) vocab_choice = vocab_params.pop_choice("type", Vocabulary.list_available(), True) vocab = Vocabulary.by_name(vocab_choice).from_files(vocab_dir) model_params = config.get('model') # The experiment config tells us how to _train_ a model, including where to get pre-trained # embeddings from. We're now _loading_ the model, so those embeddings will already be # stored in our weights. We don't need any pretrained weight file anymore, and we don't # want the code to look for it, so we remove it from the parameters here. remove_pretrained_embedding_params(model_params) model = Model.from_params(vocab=vocab, params=model_params) model_state = torch.load(weights_file, map_location=util.device_mapping(cuda_device)) model.load_state_dict(model_state) # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are # in sync with the weights if cuda_device >= 0: model.cuda(cuda_device) else: model.cpu() return model
def _load(config: Params, adapters_dir: str, serialization_dir: str, weights_file: str = None, cuda_device: int = -1) -> 'Model': """ Instantiates an already-trained model, based on the experiment configuration and some optional overrides. """ weights_file = weights_file or os.path.join(serialization_dir, "best.th") # Load vocabulary from file vocab_dir = os.path.join(serialization_dir, 'vocabulary') # If the config specifies a vocabulary subclass, we need to use it. vocab_params = config.get("vocabulary", Params({})) vocab_choice = vocab_params.pop_choice("type", Vocabulary.list_available(), True) vocab = Vocabulary.by_name(vocab_choice).from_files(vocab_dir) model_params = config.get('model') # The experiment config tells us how to _train_ a model, including where to get pre-trained # embeddings from. We're now _loading_ the model, so those embeddings will already be # stored in our weights. We don't need any pretrained weight file anymore, and we don't # want the code to look for it, so we remove it from the parameters here. remove_pretrained_embedding_params(model_params) model = Model.from_params(vocab=vocab, params=model_params) # If vocab+embedding extension was done, the model initialized from from_params # and one defined by state dict in weights_file might not have same embedding shapes. # Eg. when model embedder module was transferred along with vocab extension, the # initialized embedding weight shape would be smaller than one in the state_dict. # So calling model embedding extension is required before load_state_dict. # If vocab and model embeddings are in sync, following would be just a no-op. model.extend_embedder_vocab() # model_state = torch.load(weights_file, map_location=util.device_mapping(cuda_device)) # model.load_state_dict(model_state, strict=False) for file in os.listdir(adapters_dir): logger.info(f"{file} is loading..") # loop over the adapters folder and load weights into a dictionary for i, layer in enumerate(model.text_field_embedder.token_embedder_bert.bert_model.encoder.layer): try: for j, (file, attention_adapter, output_attention) in enumerate(zip(os.listdir(adapters_dir), layer.attention.output.adapter, layer.output.adapter)): adapter_state = torch.load(os.path.join(adapters_dir, file)) attention_adapter.load_state_dict(adapter_state['attention_adapter_' + str(i)]) output_attention.load_state_dict(adapter_state['output_adapter_' + str(i)]) except AttributeError: logger.warning(f"Could not find the adapter model inside the archive {adapters_dir}") traceback.print_exc() return # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are # in sync with the weights if cuda_device >= 0: model.cuda(cuda_device) else: model.cpu() return model