def from_params(self, params: Params) -> PytorchSeq2VecWrapper: if not params.pop('batch_first', True): raise ConfigurationError("Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params['batch_first'] = True module = self._module_class(**params.as_dict()) return PytorchSeq2VecWrapper(module)
def from_params(cls, model_parameters: List, params: Params): # type: ignore # pylint: disable=arguments-differ if isinstance(params, str): optimizer = params params = Params({}) else: optimizer = params.pop_choice("type", Optimizer.list_available()) # make the parameter groups if need groups = params.pop("parameter_groups", None) if groups: # The input to the optimizer is list of dict. # Each dict contains a "parameter group" and groups specific options, # e.g., {'params': [list of parameters], 'lr': 1e-3, ...} # Any config option not specified in the additional options (e.g. # for the default group) is inherited from the top level config. # see: http://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options # # groups contains something like: #"parameter_groups": [ # [["regex1", "regex2"], {"lr": 1e-3}, # ["regex3"], {"lr": 1e-4}] #] #(note that the allennlp config files require double quotes ", and will # fail (sometimes silently) with single quotes '). # This is typed as as Any since the dict values other then # the params key are passed to the Optimizer constructor and # can be any type it accepts. # In addition to any parameters that match group specific regex, # we also need a group for the remaining "default" group. # Those will be included in the last entry of parameter_groups. parameter_groups: Any = [{'params': []} for _ in range(len(groups) + 1)] # add the group specific kwargs for k in range(len(groups)): # pylint: disable=consider-using-enumerate parameter_groups[k].update(groups[k][1].as_dict()) regex_use_counts: Dict[str, int] = {} parameter_group_names: List[set] = [set() for _ in range(len(groups) + 1)] for name, param in model_parameters: # Determine the group for this parameter. group_index = None for k, group_regexes in enumerate(groups): for regex in group_regexes[0]: if regex not in regex_use_counts: regex_use_counts[regex] = 0 if re.search(regex, name): if group_index is not None and group_index != k: raise ValueError("{} was specified in two separate parameter groups".format(name)) group_index = k regex_use_counts[regex] += 1 if group_index is not None: parameter_groups[group_index]['params'].append(param) parameter_group_names[group_index].add(name) else: # the default group parameter_groups[-1]['params'].append(param) parameter_group_names[-1].add(name) # log the parameter groups logger.info("Done constructing parameter groups.") for k in range(len(groups) + 1): group_options = {key: val for key, val in parameter_groups[k].items() if key != 'params'} logger.info("Group %s: %s, %s", k, list(parameter_group_names[k]), group_options) # check for unused regex for regex, count in regex_use_counts.items(): if count == 0: logger.warning("When constructing parameter groups, " " %s not match any parameter name", regex) else: parameter_groups = [param for name, param in model_parameters] # Log the number of parameters to optimize num_parameters = 0 for parameter_group in parameter_groups: if isinstance(parameter_group, dict): num_parameters += sum(parameter.numel() for parameter in parameter_group["params"]) else: num_parameters += parameter_group.numel() logger.info("Number of trainable parameters: %s", num_parameters) return Optimizer.by_name(optimizer)(parameter_groups, **params.as_dict()) # type: ignore
class ArchivalTest(AllenNlpTestCase): def setUp(self): super().setUp() self.params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "validation_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'), "iterator": { "type": "basic", "batch_size": 2 }, "trainer": { "num_epochs": 2, "optimizer": "adam", } }) def test_archiving(self): # copy params, since they'll get consumed during training params_copy = copy.deepcopy(self.params.as_dict()) # `train_model` should create an archive serialization_dir = self.TEST_DIR / 'archive_test' model = train_model(self.params, serialization_dir=serialization_dir) archive_path = serialization_dir / "model.tar.gz" # load from the archive archive = load_archive(archive_path) model2 = archive.model # check that model weights are the same keys = set(model.state_dict().keys()) keys2 = set(model2.state_dict().keys()) assert keys == keys2 for key in keys: assert torch.equal(model.state_dict()[key], model2.state_dict()[key]) # check that vocabularies are the same vocab = model.vocab vocab2 = model2.vocab assert vocab._token_to_index == vocab2._token_to_index # pylint: disable=protected-access assert vocab._index_to_token == vocab2._index_to_token # pylint: disable=protected-access # check that params are the same params2 = archive.config assert params2.as_dict() == params_copy def test_extra_files(self): serialization_dir = self.TEST_DIR / 'serialization' # Train a model train_model(self.params, serialization_dir=serialization_dir) # Archive model, and also archive the training data files_to_archive = { "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') } archive_model(serialization_dir=serialization_dir, files_to_archive=files_to_archive) archive = load_archive(serialization_dir / 'model.tar.gz') params = archive.config # The param in the data should have been replaced with a temporary path # (which we don't know, but we know what it ends with). assert params.get('train_data_path').endswith('/fta/train_data_path') # The validation data path should be the same though. assert params.get('validation_data_path') == str( self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')