def from_params(self, params: Params) -> PytorchSeq2SeqWrapper: if not params.pop_bool('batch_first', True): raise ConfigurationError("Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params['batch_first'] = True module = self._module_class(**params.as_dict()) return PytorchSeq2SeqWrapper(module)
def from_params(self, params: Params) -> PytorchSeq2VecWrapper: if not params.pop("batch_first", True): raise ConfigurationError( "Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params["batch_first"] = True module = self._module_class(**params.as_dict(infer_type_and_cast=True)) return PytorchSeq2VecWrapper(module)
def from_params(self, params: Params, **extras) -> PytorchSeq2SeqWrapper: if not params.pop_bool("batch_first", True): raise ConfigurationError("Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params["batch_first"] = True stateful = params.pop_bool("stateful", False) module = self._module_class(**params.as_dict(infer_type_and_cast=True)) return PytorchSeq2SeqWrapper(module, stateful=stateful)
def from_params(self, params: Params) -> PytorchSeq2SeqWrapper: if not params.pop_bool('batch_first', True): raise ConfigurationError( "Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params['batch_first'] = True module = self._module_class(**params.as_dict()) return PytorchSeq2SeqWrapper(module, self._stateful)
def from_params(cls, params: Params): from allennlp.experiments.registry import Registry # TODO(Mark): The adaptive iterator will need a bit of work here, # to retrieve the scaling function etc. iterator_type = params.pop_choice("type", Registry.list_data_iterators()) return Registry.get_data_iterator(iterator_type)( **params.as_dict()) # type: ignore
def write_config_to_file(filepath: str, config: Params) -> None: """Writes the config to a json file, specifed by filepath """ with io.open(filepath, 'w', encoding='utf-8', errors='ignore') as fd: json.dump(fp=fd, obj=config.as_dict(quiet=True), ensure_ascii=False, indent=4, sort_keys=True)
def from_params(cls, model_parameters: List[torch.nn.Parameter], params: Params): if isinstance(params, str): optimizer = params params = Params({}) else: optimizer = params.pop_choice("type", Optimizer.list_available()) return Optimizer.by_name(optimizer)(model_parameters, **params.as_dict()) # type: ignore
def get_predictor(predictor_name: str, params: Params, archive: str): cuda_device = params["trainer"]["cuda_device"] check_for_gpu(cuda_device) archive = load_archive(archive, cuda_device=cuda_device, overrides=json.dumps(params.as_dict())) predictor = Predictor.from_archive(archive, predictor_name) return predictor
def test_embedding_vocab_extension_raises_error_for_incorrect_vocab(self): # When vocab namespace of extension vocab is smaller than embeddings # it should raise configuration error. vocab = Vocabulary({"tokens": {"word1": 1, "word2": 1}}) embedding_params = Params({ "vocab_namespace": "tokens", "embedding_dim": 10 }) embedder = Embedding.from_vocab_or_file( vocab, **embedding_params.as_dict(quiet=True)) with pytest.raises(ConfigurationError): embedder.extend_vocab(Vocabulary(), "tokens")
def test_read_embedding_file_inside_archive(self): token2vec = { "think": torch.Tensor([0.143, 0.189, 0.555, 0.361, 0.472]), "make": torch.Tensor([0.878, 0.651, 0.044, 0.264, 0.872]), "difference": torch.Tensor([0.053, 0.162, 0.671, 0.110, 0.259]), "àèìòù": torch.Tensor([1.0, 2.0, 3.0, 4.0, 5.0]), } vocab = Vocabulary() for token in token2vec: vocab.add_token_to_namespace(token) params = Params({ "pretrained_file": str(self.FIXTURES_ROOT / "embeddings/multi-file-archive.zip"), "embedding_dim": 5, }) with pytest.raises( ValueError, match= "The archive .*/embeddings/multi-file-archive.zip contains multiple files, " "so you must select one of the files inside " "providing a uri of the type: " "\\(path_or_url_to_archive\\)#path_inside_archive\\.", ): Embedding.from_vocab_or_file(vocab, **params.as_dict(quiet=True)) for ext in [".zip", ".tar.gz"]: archive_path = str( self.FIXTURES_ROOT / "embeddings/multi-file-archive") + ext file_uri = format_embeddings_file_uri( archive_path, "folder/fake_embeddings.5d.txt") params = Params({"pretrained_file": file_uri, "embedding_dim": 5}) embeddings = Embedding.from_vocab_or_file( vocab, **params.as_dict(quiet=True)).weight.data for tok, vec in token2vec.items(): i = vocab.get_token_index(tok) assert torch.equal(embeddings[i], vec), "Problem with format " + archive_path
def from_params(cls, params: Params) -> 'RNNEncoder': module = params.pop('module').lower() if module == 'lstm': module_class = nn.LSTM elif module == 'gru': module_class = nn.GRU elif module == 'rnn': module_class = nn.RNN else: raise ConfigurationError("Unsupported module type") module = module_class(**params.as_dict()) return RNNEncoder(module)
def from_params(self, params: Params) -> PytorchSeq2VecWrapper: if not params.pop('batch_first', True): raise ConfigurationError( "Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params['batch_first'] = True return_all_layers = params.pop('return_all_layers', False) return_all_hidden_states = params.pop('return_all_hidden_states', False) module = self._module_class(**params.as_dict()) return PytorchSeq2VecWrapper( module, return_all_layers=return_all_layers, return_all_hidden_states=return_all_hidden_states)
def test_embedding_vocab_extension_works_with_pretrained_embedding_file( self): vocab = Vocabulary() vocab.add_token_to_namespace("word1") vocab.add_token_to_namespace("word2") embeddings_filename = str(self.TEST_DIR / "embeddings2.gz") with gzip.open(embeddings_filename, "wb") as embeddings_file: embeddings_file.write("word3 0.5 0.3 -6.0\n".encode("utf-8")) embeddings_file.write("word4 1.0 2.3 -1.0\n".encode("utf-8")) embeddings_file.write("word2 0.1 0.4 -4.0\n".encode("utf-8")) embeddings_file.write("word1 1.0 2.3 -1.0\n".encode("utf-8")) embedding_params = Params({ "vocab_namespace": "tokens", "embedding_dim": 3, "pretrained_file": embeddings_filename, }) embedder = Embedding.from_vocab_or_file( vocab, **embedding_params.as_dict(quiet=True)) # Change weight to simulate embedding training embedder.weight.data += 1 assert torch.all(embedder.weight[2:, :] == torch.Tensor( [[2.0, 3.3, 0.0], [1.1, 1.4, -3.0]])) original_weight = embedder.weight assert tuple(original_weight.size()) == ( 4, 3) # 4 because of padding and OOV vocab.add_token_to_namespace("word3") embedder.extend_vocab( vocab, extension_pretrained_file=embeddings_filename) # default namespace extended_weight = embedder.weight # Make sure extenstion happened for extra token in extended vocab assert tuple(extended_weight.size()) == (5, 3) # Make sure extension doesn't change original trained weights. assert torch.all(original_weight[:4, :] == extended_weight[:4, :]) # Make sure extended weight is taken from the embedding file. assert torch.all( extended_weight[4, :] == torch.Tensor([0.5, 0.3, -6.0]))
def test_read_hdf5_raises_on_invalid_shape(self): vocab = Vocabulary() vocab.add_token_to_namespace("word") embeddings_filename = str(self.TEST_DIR / "embeddings.hdf5") embeddings = numpy.random.rand(vocab.get_vocab_size(), 10) with h5py.File(embeddings_filename, "w") as fout: _ = fout.create_dataset("embedding", embeddings.shape, dtype="float32", data=embeddings) params = Params({ "pretrained_file": embeddings_filename, "embedding_dim": 5 }) with pytest.raises(ConfigurationError): _ = Embedding.from_vocab_or_file(vocab, **params.as_dict(quiet=True))
def test_get_embedding_layer_initializes_unseen_words_randomly_not_zero( self): vocab = Vocabulary() vocab.add_token_to_namespace("word") vocab.add_token_to_namespace("word2") embeddings_filename = str(self.TEST_DIR / "embeddings.gz") with gzip.open(embeddings_filename, "wb") as embeddings_file: embeddings_file.write("word 1.0 2.3 -1.0\n".encode("utf-8")) params = Params({ "pretrained_file": embeddings_filename, "embedding_dim": 3 }) embedding_layer = Embedding.from_vocab_or_file( vocab, **params.as_dict(quiet=True)) word_vector = embedding_layer.weight.data[vocab.get_token_index( "word2")] assert not numpy.allclose(word_vector.numpy(), numpy.array([0.0, 0.0, 0.0]))
def test_read_hdf5_format_file(self): vocab = Vocabulary() vocab.add_token_to_namespace("word") vocab.add_token_to_namespace("word2") embeddings_filename = str(self.TEST_DIR / "embeddings.hdf5") embeddings = numpy.random.rand(vocab.get_vocab_size(), 5) with h5py.File(embeddings_filename, "w") as fout: _ = fout.create_dataset("embedding", embeddings.shape, dtype="float32", data=embeddings) params = Params({ "pretrained_file": embeddings_filename, "embedding_dim": 5 }) embedding_layer = Embedding.from_vocab_or_file( vocab, **params.as_dict(quiet=True)) assert numpy.allclose(embedding_layer.weight.data.numpy(), embeddings)
def test_forward_works_with_projection_layer(self): vocab = Vocabulary() vocab.add_token_to_namespace("the") vocab.add_token_to_namespace("a") params = Params({ "pretrained_file": str(self.FIXTURES_ROOT / "embeddings/glove.6B.300d.sample.txt.gz"), "embedding_dim": 300, "projection_dim": 20, }) embedding_layer = Embedding.from_vocab_or_file( vocab, **params.as_dict(quiet=True)) input_tensor = torch.LongTensor([[3, 2, 1, 0]]) embedded = embedding_layer(input_tensor).data.numpy() assert embedded.shape == (1, 4, 20) input_tensor = torch.LongTensor([[[3, 2, 1, 0]]]) embedded = embedding_layer(input_tensor).data.numpy() assert embedded.shape == (1, 1, 4, 20)
def test_embedding_vocab_extension_with_default_namespace(self): vocab = Vocabulary() vocab.add_token_to_namespace("word1") vocab.add_token_to_namespace("word2") embedding_params = Params({ "vocab_namespace": "tokens", "embedding_dim": 10 }) embedder = Embedding.from_vocab_or_file( vocab, **embedding_params.as_dict(quiet=True)) original_weight = embedder.weight assert original_weight.shape[0] == 4 extension_counter = {"tokens": {"word3": 1}} vocab._extend(extension_counter) embedder.extend_vocab(vocab) # default namespace extended_weight = embedder.weight assert extended_weight.shape[0] == 5 assert torch.all(extended_weight[:4, :] == original_weight[:4, :])
def from_params(self, params: Params): transform = self._transform_class(**params.as_dict()) return ImageTransform(transform)
class ArchivalTest(AllenNlpTestCase): def setUp(self): super().setUp() self.params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "validation_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "iterator": { "type": "basic", "batch_size": 2 }, "trainer": { "num_epochs": 2, "optimizer": "adam", } }) def test_archiving(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / 'archive_test' model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model assert_models_equal(model, model2) # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_archive_model_uses_archive_path(self): serialization_dir = self.TEST_DIR / 'serialization' # Train a model train_model(self.params, serialization_dir=serialization_dir) # Use a new path. archive_model(serialization_dir=serialization_dir, archive_path=serialization_dir / "new_path.tar.gz") archive = load_archive(serialization_dir / 'new_path.tar.gz') assert archive def test_extra_files(self): serialization_dir = self.TEST_DIR / 'serialization' # Train a model train_model(self.params, serialization_dir=serialization_dir) # Archive model, and also archive the training data files_to_archive = { "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') } archive_model(serialization_dir=serialization_dir, files_to_archive=files_to_archive) archive = load_archive(serialization_dir / 'model.tar.gz') params = archive.config # The param in the data should have been replaced with a temporary path # (which we don't know, but we know what it ends with). assert params.get('train_data_path').endswith('/fta/train_data_path') # The temporary path should be accessible even after the load_archive # function returns. assert os.path.exists(params.get('train_data_path')) # The validation data path should be the same though. assert params.get('validation_data_path') == str( self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') def test_loading_serialization_directory(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / 'serialization' model = train_model(self.params, serialization_dir=serialization_dir) # load from the serialization directory itself archive = load_archive(serialization_dir) model2 = archive.model assert_models_equal(model, model2) # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_loading_serialization_directory_with_extra_files(self): serialization_dir = self.TEST_DIR / 'serialization' # Train a model train_model(self.params, serialization_dir=serialization_dir) # Archive model, and also archive the training data original_train_data_path = str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') files_to_archive = {"train_data_path": original_train_data_path} archive_model(serialization_dir=serialization_dir, files_to_archive=files_to_archive) archive = load_archive(serialization_dir) params = archive.config # We're loading from a directory, so retain the original path. assert params.get('train_data_path') == original_train_data_path
class ArchivalTest(AllenNlpTestCase): def setUp(self): super().setUp() self.params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "validation_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "iterator": {"type": "basic", "batch_size": 2}, "trainer": { "num_epochs": 2, "optimizer": "adam", } }) def test_archiving(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / 'archive_test' model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model # check that model weights are the same keys = set(model.state_dict().keys()) keys2 = set(model2.state_dict().keys()) assert keys == keys2 for key in keys: assert torch.equal(model.state_dict()[key], model2.state_dict()[key]) # check that vocabularies are the same vocab = model.vocab vocab2 = model2.vocab assert vocab._token_to_index == vocab2._token_to_index # pylint: disable=protected-access assert vocab._index_to_token == vocab2._index_to_token # pylint: disable=protected-access # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_extra_files(self): serialization_dir = self.TEST_DIR / 'serialization' # Train a model train_model(self.params, serialization_dir=serialization_dir) # Archive model, and also archive the training data files_to_archive = {"train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')} archive_model(serialization_dir=serialization_dir, files_to_archive=files_to_archive) archive = load_archive(serialization_dir / 'model.tar.gz') params = archive.config # The param in the data should have been replaced with a temporary path # (which we don't know, but we know what it ends with). assert params.get('train_data_path').endswith('/fta/train_data_path') # The validation data path should be the same though. assert params.get('validation_data_path') == str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
class ArchivalTest(AllenNlpTestCase): def setUp(self): super().setUp() self.params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "validation_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "iterator": { "type": "basic", "batch_size": 2 }, "trainer": { "num_epochs": 2, "optimizer": "adam", } }) def test_archiving(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / 'archive_test' model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model # check that model weights are the same keys = set(model.state_dict().keys()) keys2 = set(model2.state_dict().keys()) assert keys == keys2 for key in keys: assert torch.equal(model.state_dict()[key], model2.state_dict()[key]) # check that vocabularies are the same vocab = model.vocab vocab2 = model2.vocab assert vocab._token_to_index == vocab2._token_to_index # pylint: disable=protected-access assert vocab._index_to_token == vocab2._index_to_token # pylint: disable=protected-access # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_extra_files(self): serialization_dir = self.TEST_DIR / 'serialization' # Train a model train_model(self.params, serialization_dir=serialization_dir) # Archive model, and also archive the training data files_to_archive = { "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') } archive_model(serialization_dir=serialization_dir, files_to_archive=files_to_archive) archive = load_archive(serialization_dir / 'model.tar.gz') params = archive.config # The param in the data should have been replaced with a temporary path # (which we don't know, but we know what it ends with). assert params.get('train_data_path').endswith('/fta/train_data_path') # The temporary path should be accessible even after the load_archive # function returns. assert os.path.exists(params.get('train_data_path')) # The validation data path should be the same though. assert params.get('validation_data_path') == str( self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
def from_params(cls, model_parameters: List, params: Params): # type: ignore # pylint: disable=arguments-differ if isinstance(params, str): optimizer = params params = Params({}) else: optimizer = params.pop_choice("type", Optimizer.list_available()) # make the parameter groups if need groups = params.pop("parameter_groups", None) if groups: # The input to the optimizer is list of dict. # Each dict contains a "parameter group" and groups specific options, # e.g., {'params': [list of parameters], 'lr': 1e-3, ...} # Any config option not specified in the additional options (e.g. # for the default group) is inherited from the top level config. # see: https://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options # # groups contains something like: #"parameter_groups": [ # [["regex1", "regex2"], {"lr": 1e-3}], # [["regex3"], {"lr": 1e-4}] #] #(note that the allennlp config files require double quotes ", and will # fail (sometimes silently) with single quotes '). # This is typed as as Any since the dict values other then # the params key are passed to the Optimizer constructor and # can be any type it accepts. # In addition to any parameters that match group specific regex, # we also need a group for the remaining "default" group. # Those will be included in the last entry of parameter_groups. parameter_groups: Any = [{'params': []} for _ in range(len(groups) + 1)] # add the group specific kwargs for k in range(len(groups)): # pylint: disable=consider-using-enumerate parameter_groups[k].update(groups[k][1].as_dict()) regex_use_counts: Dict[str, int] = {} parameter_group_names: List[set] = [set() for _ in range(len(groups) + 1)] for name, param in model_parameters: # Determine the group for this parameter. group_index = None for k, group_regexes in enumerate(groups): for regex in group_regexes[0]: if regex not in regex_use_counts: regex_use_counts[regex] = 0 if re.search(regex, name): if group_index is not None and group_index != k: raise ValueError("{} was specified in two separate parameter groups".format(name)) group_index = k regex_use_counts[regex] += 1 if group_index is not None: parameter_groups[group_index]['params'].append(param) parameter_group_names[group_index].add(name) else: # the default group parameter_groups[-1]['params'].append(param) parameter_group_names[-1].add(name) # log the parameter groups logger.info("Done constructing parameter groups.") for k in range(len(groups) + 1): group_options = {key: val for key, val in parameter_groups[k].items() if key != 'params'} logger.info("Group %s: %s, %s", k, list(parameter_group_names[k]), group_options) # check for unused regex for regex, count in regex_use_counts.items(): if count == 0: logger.warning("When constructing parameter groups, " " %s not match any parameter name", regex) else: parameter_groups = [param for name, param in model_parameters] # Log the number of parameters to optimize num_parameters = 0 for parameter_group in parameter_groups: if isinstance(parameter_group, dict): num_parameters += sum(parameter.numel() for parameter in parameter_group["params"]) else: num_parameters += parameter_group.numel() logger.info("Number of trainable parameters: %s", num_parameters) # By default we cast things that e.g. look like floats to floats before handing them # to the Optimizer constructor, but if you want to disable that behavior you could add a # "infer_type_and_cast": false # key to your "trainer.optimizer" config. infer_type_and_cast = params.pop_bool("infer_type_and_cast", True) params_as_dict = params.as_dict(infer_type_and_cast=infer_type_and_cast) subclass = Optimizer.by_name(optimizer) # If the optimizer subclass has a from_params, use it. if hasattr(subclass, 'from_params'): return subclass.from_params(parameter_groups, params=params) else: return subclass(parameter_groups, **params_as_dict) # type: ignore
def from_params( # type: ignore cls, params: Params, serialization_dir: str, recover: bool = False, local_rank: int = 0, ) -> "Trainer": from allennlp.training.trainer import Trainer from allennlp.training.trainer_pieces import TrainerPieces config = dict(as_flat_dict(params.as_dict())) pieces = TrainerPieces.from_params(params, serialization_dir, recover) model = pieces.model serialization_dir = serialization_dir iterator = pieces.iterator train_data = pieces.train_dataset validation_data = pieces.validation_dataset params = pieces.params validation_iterator = pieces.validation_iterator patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = parse_cuda_device(params.pop("cuda_device", -1)) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) momentum_scheduler_params = params.pop("momentum_scheduler", None) check_for_gpu(cuda_device) if cuda_device >= 0: # Moving model to GPU here so that the optimizer state gets constructed on # the right device. model = model.cuda(cuda_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if "moving_average" in params: moving_average = MovingAverage.from_params( params.pop("moving_average"), parameters=parameters ) else: moving_average = None if lr_scheduler_params: lr_scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params) else: lr_scheduler = None if momentum_scheduler_params: momentum_scheduler = MomentumScheduler.from_params(optimizer, momentum_scheduler_params) else: momentum_scheduler = None if "checkpointer" in params: if ( "keep_serialized_model_every_num_seconds" in params or "num_serialized_models_to_keep" in params ): raise ConfigurationError( "Checkpointer may be initialized either from the 'checkpointer' key or from the " "keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'" " but the passed config uses both methods." ) checkpointer = Checkpointer.from_params(params.pop("checkpointer")) else: num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None ) checkpointer = Checkpointer( serialization_dir=serialization_dir, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds, ) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) should_log_parameter_statistics = params.pop_bool("should_log_parameter_statistics", True) should_log_learning_rate = params.pop_bool("should_log_learning_rate", False) log_batch_size_period = params.pop_int("log_batch_size_period", None) distributed = params.pop_bool("distributed", False) world_size = params.pop_int("world_size", 1) num_gradient_accumulation_steps = params.pop("num_gradient_accumulation_steps", 1) lang_mean_dir = params.pop("ft_lang_mean_dir", None) if lang_mean_dir: try: assert model._lang_means is not None lang_mean = get_lang_mean(lang_mean_dir) model.add_ft_lang_mean_to_lang_means(lang_mean) except (AttributeError, AssertionError) as e: pass writer = None wandb_config = params.pop("wandb", None) if wandb_config is not None: writer = WandBWriter(config, model, wandb_config) params.assert_empty(cls.__name__) return cls( model, optimizer, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=lr_scheduler, momentum_scheduler=momentum_scheduler, checkpointer=checkpointer, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval, should_log_parameter_statistics=should_log_parameter_statistics, should_log_learning_rate=should_log_learning_rate, log_batch_size_period=log_batch_size_period, moving_average=moving_average, distributed=distributed, local_rank=local_rank, world_size=world_size, num_gradient_accumulation_steps=num_gradient_accumulation_steps, writer=writer, )
class ArchivalTest(AllenNlpTestCase): def setUp(self): super().setUp() self.params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 }, }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "iterator": { "type": "basic", "batch_size": 2 }, "trainer": { "num_epochs": 2, "optimizer": "adam" }, }) def test_archiving(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / "archive_test" model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model assert_models_equal(model, model2) # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_archive_model_uses_archive_path(self): serialization_dir = self.TEST_DIR / "serialization" # Train a model train_model(self.params, serialization_dir=serialization_dir) # Use a new path. archive_model(serialization_dir=serialization_dir, archive_path=serialization_dir / "new_path.tar.gz") archive = load_archive(serialization_dir / "new_path.tar.gz") assert archive def test_loading_serialization_directory(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / "serialization" model = train_model(self.params, serialization_dir=serialization_dir) # load from the serialization directory itself archive = load_archive(serialization_dir) model2 = archive.model assert_models_equal(model, model2) # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy
def test_archiving(self): super(ArchivalTest, self).setUp() params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "stacked_encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": 'tests/fixtures/data/sequence_tagging.tsv', "validation_data_path": 'tests/fixtures/data/sequence_tagging.tsv', "iterator": { "type": "basic", "batch_size": 2 }, "trainer": { "num_epochs": 2, "optimizer": "adam", } }) # copy params, since they'll get consumed during training params_copy = copy.deepcopy(params.as_dict()) # `train_model` should create an archive model = train_model(params, serialization_dir=self.TEST_DIR) archive_path = os.path.join(self.TEST_DIR, "model.tar.gz") # load from the archive archive = load_archive(archive_path) model2 = archive.model # check that model weights are the same keys = set(model.state_dict().keys()) keys2 = set(model2.state_dict().keys()) assert keys == keys2 for key in keys: assert torch.equal(model.state_dict()[key], model2.state_dict()[key]) # check that vocabularies are the same vocab = model.vocab vocab2 = model2.vocab assert vocab._token_to_index == vocab2._token_to_index # pylint: disable=protected-access assert vocab._index_to_token == vocab2._index_to_token # pylint: disable=protected-access # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy
def from_params(cls, model_parameters: List, params: Params): # type: ignore # pylint: disable=arguments-differ if isinstance(params, str): optimizer = params params = Params({}) else: optimizer = params.pop_choice("type", Optimizer.list_available()) # make the parameter groups if need groups = params.pop("parameter_groups", None) if groups: # The input to the optimizer is list of dict. # Each dict contains a "parameter group" and groups specific options, # e.g., {'params': [list of parameters], 'lr': 1e-3, ...} # Any config option not specified in the additional options (e.g. # for the default group) is inherited from the top level config. # see: http://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options # # groups contains something like: #"parameter_groups": [ # [["regex1", "regex2"], {"lr": 1e-3}], # [["regex3"], {"lr": 1e-4}] #] #(note that the allennlp config files require double quotes ", and will # fail (sometimes silently) with single quotes '). # This is typed as as Any since the dict values other then # the params key are passed to the Optimizer constructor and # can be any type it accepts. # In addition to any parameters that match group specific regex, # we also need a group for the remaining "default" group. # Those will be included in the last entry of parameter_groups. parameter_groups: Any = [{'params': []} for _ in range(len(groups) + 1)] # add the group specific kwargs for k in range(len(groups)): # pylint: disable=consider-using-enumerate parameter_groups[k].update(groups[k][1].as_dict()) regex_use_counts: Dict[str, int] = {} parameter_group_names: List[set] = [set() for _ in range(len(groups) + 1)] for name, param in model_parameters: # Determine the group for this parameter. group_index = None for k, group_regexes in enumerate(groups): for regex in group_regexes[0]: if regex not in regex_use_counts: regex_use_counts[regex] = 0 if re.search(regex, name): if group_index is not None and group_index != k: raise ValueError("{} was specified in two separate parameter groups".format(name)) group_index = k regex_use_counts[regex] += 1 if group_index is not None: parameter_groups[group_index]['params'].append(param) parameter_group_names[group_index].add(name) else: # the default group parameter_groups[-1]['params'].append(param) parameter_group_names[-1].add(name) # log the parameter groups logger.info("Done constructing parameter groups.") for k in range(len(groups) + 1): group_options = {key: val for key, val in parameter_groups[k].items() if key != 'params'} logger.info("Group %s: %s, %s", k, list(parameter_group_names[k]), group_options) # check for unused regex for regex, count in regex_use_counts.items(): if count == 0: logger.warning("When constructing parameter groups, " " %s not match any parameter name", regex) else: parameter_groups = [param for name, param in model_parameters] # Log the number of parameters to optimize num_parameters = 0 for parameter_group in parameter_groups: if isinstance(parameter_group, dict): num_parameters += sum(parameter.numel() for parameter in parameter_group["params"]) else: num_parameters += parameter_group.numel() logger.info("Number of trainable parameters: %s", num_parameters) # By default we cast things that e.g. look like floats to floats before handing them # to the Optimizer constructor, but if you want to disable that behavior you could add a # "infer_type_and_cast": false # key to your "trainer.optimizer" config. infer_type_and_cast = params.pop_bool("infer_type_and_cast", True) params_as_dict = params.as_dict(infer_type_and_cast=infer_type_and_cast) return Optimizer.by_name(optimizer)(parameter_groups, **params_as_dict) # type: ignore
def from_params(self, params: Params) -> CrossValidationSplitter: generate_validation_sets = params.pop_bool('generate_validation_sets', False) cross_validator = self.cross_validator_class(**params.as_dict()) return CrossValidationSplitter( cross_validator, generate_validation_sets=generate_validation_sets)
def make_files_for_official_eval(model_archive_file, evaluation_files, output_file, cuda_device): archive = load_archive(model_archive_file) model = archive.model model.eval() if cuda_device != -1: model.cuda(cuda_device) def find_key(d, func): ret = None stack = [d] while len(stack) > 0 and ret is None: s = stack.pop() for k, v in s.items(): if func(k, v): ret = s break elif isinstance(v, dict): stack.append(v) return ret # load reader full_reader_params = copy.deepcopy(archive.config['dataset_reader'].as_dict()) reader_params = find_key(full_reader_params, lambda k, v: k == 'type' and v == 'wordnet_fine_grained') reader_params['is_training'] = False reader_params['should_remap_span_indices'] = True if 'extra_candidate_generators' in reader_params: candidate_generator_params = find_key( full_reader_params, lambda k, v: k == 'tokenizer_and_candidate_generator' )['tokenizer_and_candidate_generator'] candidate_generator = TokenizerAndCandidateGenerator.from_params( Params(candidate_generator_params) ) reader_params = Params(reader_params) print("====================") print(reader_params.as_dict()) print("====================") reader = DatasetReader.from_params(reader_params) synset_to_lemmas = {} for lemma_id, synset_id in reader.mention_generator._lemma_to_synset.items(): if synset_id not in synset_to_lemmas: synset_to_lemmas[synset_id] = [] synset_to_lemmas[synset_id].append(lemma_id) vocab_params = archive.config['vocabulary'] vocab = Vocabulary.from_params(vocab_params) iterator = BasicIterator(batch_size=24) iterator.index_with(vocab) fout = open(output_file, 'w') for ds_file in [evaluation_file]: instances = reader.read(ds_file) # get the metadata ids from the raw file raw_lines = [] with JsonFile(ds_file, 'r') as fin: for sentence in fin: raw_ids = [[token['id'], token['lemma']] for token in sentence if 'senses' in token] if len(raw_ids) > 0: raw_lines.append(raw_ids) raw_i = 0 for batch in iterator(instances, num_epochs=1, shuffle=False): print(raw_i) if cuda_device > -1: b = move_to_device(batch, cuda_device) else: b = batch b['candidates'] = {'wordnet': { 'candidate_entities': b.pop('candidate_entities'), 'candidate_entity_priors': b.pop('candidate_entity_prior'), 'candidate_segment_ids': b.pop('candidate_segment_ids'), 'candidate_spans': b.pop('candidate_spans')}} gold_entities = b.pop('gold_entities') b['gold_entities'] = {'wordnet': gold_entities} if 'extra_candidates' in b: extra_candidates = b.pop('extra_candidates') seq_len = b['tokens']['tokens'].shape[1] bbb = [] for e in extra_candidates: for k in e.keys(): e[k]['candidate_segment_ids'] = [0] * len(e[k]['candidate_spans']) ee = {'tokens': ['[CLS]'] * seq_len, 'segment_ids': [0] * seq_len, 'candidates': e} ee_fields = candidate_generator.convert_tokens_candidates_to_fields(ee) bbb.append(Instance(ee_fields)) eb = Batch(bbb) eb.index_instances(vocab) padding_lengths = eb.get_padding_lengths() tensor_dict = eb.as_tensor_dict(padding_lengths) b['candidates'].update(tensor_dict['candidates']) if cuda_device > -1: b = move_to_device(b, cuda_device) output = model(**b) # predicted entities is list of (batch_index, (start, end), entity_id) predicted_entities = model.soldered_kgs['wordnet'].entity_linker._decode( output['wordnet']['linking_scores'], b['candidates']['wordnet']['candidate_spans'], b['candidates']['wordnet']['candidate_entities']['ids'] ) # make output file predicted_entities_batch_indices = [] batch_size = batch['tokens']['tokens'].shape[0] for k in range(batch_size): predicted_entities_batch_indices.append([]) for b_index, start_end, eid in predicted_entities: try: synset_id = vocab.get_token_from_index(eid, 'entity') except KeyError: synset_id = vocab.get_token_from_index(eid, 'entity_wordnet') all_lemma_ids = synset_to_lemmas[synset_id] predicted_entities_batch_indices[b_index].append(all_lemma_ids) # output lines look like semeval2013.d000.s001.t003 reader%1:19:00:: for k in range(batch_size): raw_ids = raw_lines[raw_i] predicted_lemmas = predicted_entities_batch_indices[k] assert len(predicted_lemmas) == len(raw_ids) for (ii, gold_lemma), pl in zip(raw_ids, predicted_lemmas): # get the predicted lemma_id predicted_lemma_id = None for pp in pl: if pp.partition('%')[0] == gold_lemma: predicted_lemma_id = pp assert predicted_lemma_id is not None line = "{} {}\n".format(ii, predicted_lemma_id) fout.write(line) raw_i += 1 fout.close()
def from_params(cls, model_parameters: List, params: Params): if isinstance(params, str): optimizer = params params = Params({}) else: optimizer = params.pop_choice("type", Optimizer.list_available()) # make the parameter groups if need groups = params.pop("parameter_groups", None) if groups: # input to optimizer is list of dict # each dict contains {'params': [list of parameters], 'lr': 1e-3, ...} # Any config option not specified in the additional options (e.g. # for the default group) is inherited from the top level config. # see: http://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options # # groups contains something like: #"parameter_groups": [ # [['regex1', 'regex2'], {'lr': 1e-3}, # ['regex3'], {'lr': 1e-4}] #] # # The last entry of this list is for the parameters not in any regex. # # This is typed as as Any since the dict values other then # the params key are passed to the Optimizer constructor and # can be any type it accepts. parameter_groups: Any = [{ 'params': [] } for _ in range(len(groups) + 1)] # add the group specific kwargs for k in range(len(groups)): # pylint: disable=consider-using-enumerate parameter_groups[k].update(groups[k][1].as_dict()) regex_use_counts: Dict[str, int] = {} parameter_group_names: List[set] = [ set() for _ in range(len(groups) + 1) ] for name, param in model_parameters: # Determine the group for this parameter. group_index = None for k, group_regexes in enumerate(groups): for regex in group_regexes[0]: if regex not in regex_use_counts: regex_use_counts[regex] = 0 if re.search(regex, name): if group_index is not None and group_index != k: raise ValueError( "{} was specified in two separate parameter groups" .format(name)) group_index = k regex_use_counts[regex] += 1 if group_index is not None: parameter_groups[group_index]['params'].append(param) parameter_group_names[group_index].add(name) else: # the default group parameter_groups[-1]['params'].append(param) parameter_group_names[-1].add(name) # log the parameter groups logger.info("Done constructing parameter groups.") for k in range(len(groups) + 1): group_options = { key: val for key, val in parameter_groups[k].items() if key != 'params' } print("Group {0}: {1}, {2}".format( k, list(parameter_group_names[k]), group_options)) # check for unused regex for regex, count in regex_use_counts.items(): if count == 0: logger.warning( "When constructing parameter groups, " " %s not match any parameter name", regex) else: parameter_groups = [param for name, param in model_parameters] return Optimizer.by_name(optimizer)(parameter_groups, **params.as_dict()) # type: ignore
def from_params(cls, model_parameters , params ): # type: ignore # pylint: disable=arguments-differ if isinstance(params, unicode): optimizer = params params = Params({}) else: optimizer = params.pop_choice(u"type", Optimizer.list_available()) # make the parameter groups if need groups = params.pop(u"parameter_groups", None) if groups: # The input to the optimizer is list of dict. # Each dict contains a "parameter group" and groups specific options, # e.g., {'params': [list of parameters], 'lr': 1e-3, ...} # Any config option not specified in the additional options (e.g. # for the default group) is inherited from the top level config. # see: http://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options # # groups contains something like: #"parameter_groups": [ # [["regex1", "regex2"], {"lr": 1e-3}, # ["regex3"], {"lr": 1e-4}] #] #(note that the allennlp config files require double quotes ", and will # fail (sometimes silently) with single quotes '). # This is typed as as Any since the dict values other then # the params key are passed to the Optimizer constructor and # can be any type it accepts. # In addition to any parameters that match group specific regex, # we also need a group for the remaining "default" group. # Those will be included in the last entry of parameter_groups. parameter_groups = [{u'params': []} for _ in range(len(groups) + 1)] # add the group specific kwargs for k in range(len(groups)): # pylint: disable=consider-using-enumerate parameter_groups[k].update(groups[k][1].as_dict()) regex_use_counts = {} parameter_group_names = [set() for _ in range(len(groups) + 1)] for name, param in model_parameters: # Determine the group for this parameter. group_index = None for k, group_regexes in enumerate(groups): for regex in group_regexes[0]: if regex not in regex_use_counts: regex_use_counts[regex] = 0 if re.search(regex, name): if group_index is not None and group_index != k: raise ValueError(u"{} was specified in two separate parameter groups".format(name)) group_index = k regex_use_counts[regex] += 1 if group_index is not None: parameter_groups[group_index][u'params'].append(param) parameter_group_names[group_index].add(name) else: # the default group parameter_groups[-1][u'params'].append(param) parameter_group_names[-1].add(name) # log the parameter groups logger.info(u"Done constructing parameter groups.") for k in range(len(groups) + 1): group_options = dict((key, val) for key, val in list(parameter_groups[k].items()) if key != u'params') logger.info(u"Group %s: %s, %s", k, list(parameter_group_names[k]), group_options) # check for unused regex for regex, count in list(regex_use_counts.items()): if count == 0: logger.warning(u"When constructing parameter groups, " u" %s not match any parameter name", regex) else: parameter_groups = [param for name, param in model_parameters] # Log the number of parameters to optimize num_parameters = 0 for parameter_group in parameter_groups: if isinstance(parameter_group, dict): num_parameters += sum(parameter.numel() for parameter in parameter_group[u"params"]) else: num_parameters += parameter_group.numel() logger.info(u"Number of trainable parameters: %s", num_parameters) return Optimizer.by_name(optimizer)(parameter_groups, **params.as_dict()) # type: ignore
class ArchivalTest(AllenNlpTestCase): def setup_method(self): super().setup_method() self.params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 }, }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "data_loader": { "batch_size": 2 }, "trainer": { "num_epochs": 2, "optimizer": "adam" }, }) def test_archiving(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / "archive_test" model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model assert_models_equal(model, model2) # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_archive_model_uses_archive_path(self): serialization_dir = self.TEST_DIR / "serialization" # Train a model train_model(self.params, serialization_dir=serialization_dir) # Use a new path. archive_model(serialization_dir=serialization_dir, archive_path=serialization_dir / "new_path.tar.gz") archive = load_archive(serialization_dir / "new_path.tar.gz") assert archive def test_loading_serialization_directory(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / "serialization" model = train_model(self.params, serialization_dir=serialization_dir) # load from the serialization directory itself archive = load_archive(serialization_dir) model2 = archive.model assert_models_equal(model, model2) # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_can_load_from_archive_model(self): serialization_dir = self.FIXTURES_ROOT / "basic_classifier" / "from_archive_serialization" archive_path = serialization_dir / "model.tar.gz" model = load_archive(archive_path).model # We want to be sure that we don't just not crash, but also be sure that we loaded the right # weights for the model. We'll do that by making sure that we didn't just load the model # that's in the `archive_path` of the config file, which is this one. base_model_path = self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz" base_model = load_archive(base_model_path).model base_model_params = dict(base_model.named_parameters()) for name, parameters in model.named_parameters(): if parameters.size() == base_model_params[name].size(): assert not (parameters == base_model_params[name]).all() else: # In this case, the parameters are definitely different, no need for the above # check. pass
class ArchivalTest(AllenNlpTestCase): def setup_method(self): super().setup_method() self.params = Params( { "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}} }, "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2}, }, "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "data_loader": {"batch_size": 2}, "trainer": {"num_epochs": 2, "optimizer": "adam", "cuda_device": -1}, } ) def test_archiving(self): # copy params, since they'll get consumed during training params_copy = self.params.duplicate() params_dict_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / "archive_test" model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model assert_models_equal(model, model2) assert isinstance( archive.dataset_reader, type(DatasetReader.from_params(params_copy["dataset_reader"].duplicate())), ) assert isinstance( archive.validation_dataset_reader, type(DatasetReader.from_params(params_copy["dataset_reader"].duplicate())), ) # validation_dataset_reader is not in the config, so fall back to dataset_reader # check that params are the same params2 = archive.config assert params2.as_dict() == params_dict_copy def test_archive_model_uses_archive_path(self): serialization_dir = self.TEST_DIR / "serialization" # Train a model train_model(self.params, serialization_dir=serialization_dir) # Use a new path. archive_model( serialization_dir=serialization_dir, archive_path=serialization_dir / "new_path.tar.gz" ) archive = load_archive(serialization_dir / "new_path.tar.gz") assert archive def test_loading_serialization_directory(self): # copy params, since they'll get consumed during training params_dict_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / "serialization" model = train_model(self.params, serialization_dir=serialization_dir) # load from the serialization directory itself archive = load_archive(serialization_dir) model2 = archive.model assert_models_equal(model, model2) # check that params are the same params2 = archive.config assert params2.as_dict() == params_dict_copy def test_can_load_from_archive_model(self): serialization_dir = self.FIXTURES_ROOT / "basic_classifier" / "from_archive_serialization" archive_path = serialization_dir / "model.tar.gz" model = load_archive(archive_path).model # We want to be sure that we don't just not crash, but also be sure that we loaded the right # weights for the model. We'll do that by making sure that we didn't just load the model # that's in the `archive_path` of the config file, which is this one. base_model_path = self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz" base_model = load_archive(base_model_path).model base_model_params = dict(base_model.named_parameters()) for name, parameters in model.named_parameters(): if parameters.size() == base_model_params[name].size(): assert not (parameters == base_model_params[name]).all() else: # In this case, the parameters are definitely different, no need for the above # check. pass def test_include_in_archive(self): self.params["include_in_archive"] = ["metrics_epoch_*.json"] serialization_dir = self.TEST_DIR / "serialization" # Train a model train_model(self.params, serialization_dir=serialization_dir) # Assert that the additional targets were archived with tempfile.TemporaryDirectory() as tempdir: with tarfile.open(serialization_dir / "model.tar.gz", "r:gz") as archive: archive.extractall(tempdir) assert os.path.isfile(os.path.join(tempdir, "metrics_epoch_0.json")) assert os.path.isfile(os.path.join(tempdir, "metrics_epoch_1.json")) assert not os.path.isfile(os.path.join(tempdir, "metrics.json")) def test_invalid_include_in_archive(self): self.params["include_in_archive"] = [CONFIG_NAME] serialization_dir = self.TEST_DIR / "serialization" with pytest.raises(ConfigurationError) as exc: train_model(self.params, serialization_dir=serialization_dir) assert "are saved names and cannot be used" in str(exc.value)