def from_params(cls, params: Params) -> 'ELMoTokenCharactersIndexer': """ Parameters ---------- namespace : ``str``, optional (default=``elmo_characters``) """ namespace = params.pop('namespace', 'elmo_characters') params.assert_empty(cls.__name__) return cls(namespace=namespace)
def from_params(cls, params: Params, instances: Iterable['adi.Instance'] = None): """ There are two possible ways to build a vocabulary; from a collection of instances, using :func:`Vocabulary.from_instances`, or from a pre-saved vocabulary, using :func:`Vocabulary.from_files`. This method wraps both of these options, allowing their specification from a ``Params`` object, generated from a JSON configuration file. Parameters ---------- params: Params, required. dataset: Dataset, optional. If ``params`` doesn't contain a ``vocabulary_directory`` key, the ``Vocabulary`` can be built directly from a ``Dataset``. Returns ------- A ``Vocabulary``. """ vocabulary_directory = params.pop("directory_path", None) if not vocabulary_directory and not instances: raise ConfigurationError( "You must provide either a Params object containing a " "vocab_directory key or a Dataset to build a vocabulary from.") if vocabulary_directory and instances: logger.info("Loading Vocab from files instead of dataset.") if vocabulary_directory: params.assert_empty("Vocabulary - from files") return Vocabulary.from_files(vocabulary_directory) min_count = params.pop("min_count", None) max_vocab_size = params.pop_int("max_vocab_size", None) non_padded_namespaces = params.pop("non_padded_namespaces", DEFAULT_NON_PADDED_NAMESPACES) pretrained_files = params.pop("pretrained_files", {}) only_include_pretrained_words = params.pop_bool( "only_include_pretrained_words", False) params.assert_empty("Vocabulary - from dataset") return Vocabulary.from_instances( instances=instances, min_count=min_count, max_vocab_size=max_vocab_size, non_padded_namespaces=non_padded_namespaces, pretrained_files=pretrained_files, only_include_pretrained_words=only_include_pretrained_words)
def from_params(cls, params: Params) -> 'TokenCharactersIndexer': """ Parameters ---------- namespace : ``str``, optional (default=``token_characters``) We will use this namespace in the :class:`Vocabulary` to map the characters in each token to indices. character_tokenizer : ``Params``, optional (default=``Params({})``) We use a :class:`CharacterTokenizer` to handle splitting tokens into characters, as it has options for byte encoding and other things. These parameters get passed to the character tokenizer. The default is to use unicode characters and to retain casing. """ namespace = params.pop('namespace', 'token_characters') character_tokenizer_params = params.pop('character_tokenizer', {}) character_tokenizer = CharacterTokenizer.from_params( character_tokenizer_params) params.assert_empty(cls.__name__) return cls(namespace=namespace, character_tokenizer=character_tokenizer)
def create_kwargs(cls: Type[T], params: Params, **extras) -> Dict[str, Any]: """ Given some class, a `Params` object, and potentially other keyword arguments, create a dict of keyword args suitable for passing to the class's constructor. The function does this by finding the class's constructor, matching the constructor arguments to entries in the `params` object, and instantiating values for the parameters using the type annotation and possibly a from_params method. Any values that are provided in the `extras` will just be used as is. For instance, you might provide an existing `Vocabulary` this way. """ # Get the signature of the constructor. signature = inspect.signature(cls.__init__) kwargs: Dict[str, Any] = {} # Iterate over all the constructor parameters and their annotations. for name, param in signature.parameters.items(): # Skip "self". You're not *required* to call the first parameter "self", # so in theory this logic is fragile, but if you don't call the self parameter # "self" you kind of deserve what happens. if name == "self": continue # If the annotation is a compound type like typing.Dict[str, int], # it will have an __origin__ field indicating `typing.Dict` # and an __args__ field indicating `(str, int)`. We capture both. annotation = remove_optional(param.annotation) origin = getattr(annotation, '__origin__', None) args = getattr(annotation, '__args__', []) # The parameter is optional if its default value is not the "no default" sentinel. default = param.default optional = default != _NO_DEFAULT # Some constructors expect extra non-parameter items, e.g. vocab: Vocabulary. # We check the provided `extras` for these and just use them if they exist. if name in extras: kwargs[name] = extras[name] # The next case is when the parameter type is itself constructible from_params. elif hasattr(annotation, 'from_params'): if name in params: # Our params have an entry for this, so we use that. subparams = params.pop(name) if takes_arg(annotation.from_params, 'extras'): # If annotation.params accepts **extras, we need to pass them all along. # For example, `BasicTextFieldEmbedder.from_params` requires a Vocabulary # object, but `TextFieldEmbedder.from_params` does not. subextras = extras else: # Otherwise, only supply the ones that are actual args; any additional ones # will cause a TypeError. subextras = { k: v for k, v in extras.items() if takes_arg(annotation.from_params, k) } # In some cases we allow a string instead of a param dict, so # we need to handle that case separately. if isinstance(subparams, str): kwargs[name] = annotation.by_name(subparams)() else: kwargs[name] = annotation.from_params(params=subparams, **subextras) elif not optional: # Not optional and not supplied, that's an error! raise ConfigurationError( f"expected key {name} for {cls.__name__}") else: kwargs[name] = default # If the parameter type is a Python primitive, just pop it off # using the correct casting pop_xyz operation. elif annotation == str: kwargs[name] = (params.pop(name, default) if optional else params.pop(name)) elif annotation == int: kwargs[name] = (params.pop_int(name, default) if optional else params.pop_int(name)) elif annotation == bool: kwargs[name] = (params.pop_bool(name, default) if optional else params.pop_bool(name)) elif annotation == float: kwargs[name] = (params.pop_float(name, default) if optional else params.pop_float(name)) # This is special logic for handling types like Dict[str, TokenIndexer], which it creates by # instantiating each value from_params and returning the resulting dict. elif origin in (Dict, dict) and len(args) == 2 and hasattr( args[-1], 'from_params'): value_cls = annotation.__args__[-1] value_dict = {} for key, value_params in params.pop(name, Params({})).items(): value_dict[key] = value_cls.from_params(params=value_params, **extras) kwargs[name] = value_dict else: # Pass it on as is and hope for the best. ¯\_(ツ)_/¯ if optional: kwargs[name] = params.pop(name, default) else: kwargs[name] = params.pop(name) params.assert_empty(cls.__name__) return kwargs