def _validate(self, finetune_enabled=False): check_true( isinstance(self.n_components, int), TypeError("The number of components must be an integer.")) check_true( self.n_components >= 2, ValueError("The number of components must be at least two."))
def _validate(self, finetune_enabled=False): check_true( self.pretrained is None or self.pretrained is Constants.default_model, ValueError( "Random embeddings cannot be pretrained. Save the TextWiser object instead." ))
def _validate(self, finetune_enabled=False): import umap # this will fail if umap is not available check_true( isinstance(self.n_components, int), TypeError("The number of components must be an integer.")) check_true( self.n_components >= 2, ValueError("The number of components must be at least two."))
def _validate(self, finetune_enabled=False): check_true( not self.pretrained or self.pretrained is Constants.default_model or (isinstance(self.pretrained, str) and os.path.exists(self.pretrained)) or hasattr(self.pretrained, 'read'), # file-like ValueError( "The pretrained model should be a path to a pickle file or a file-like object." ))
def _validate(self, finetune_enabled=False): check_true( isinstance(self.schema, (dict, str, tuple, list)), TypeError( "The schema should either be a dictionary, a valid embedding, an embedding-parameters tuple, or the path to a JSON file." )) check_true( self.pretrained is None or self.pretrained is Constants.default_model, ValueError( "Compound embeddings cannot be pretrained. Save the TextWiser object instead." ))
def _validate_init_args(embedding, transformations, is_finetuneable, dtype): """ Validates arguments for the constructor. """ # Embedding embedding._validate(finetune_enabled=is_finetuneable) # Transformation if transformations: [transformation._validate() for transformation in transformations] # words should be pooled if isinstance( embedding, Embedding.Word) and embedding.inline_pool_option is None and ( not transformations or not any([ isinstance(transformation, Transformation.Pool) for transformation in transformations ])): warnings.warn( "Word embeddings are specified but no pool options are specified. Are you sure you don't want to pool them?", RuntimeWarning) # words shouldn't be double-pooled check_false( isinstance(embedding, Embedding.Word) and embedding.inline_pool_option is not None and transformations and any([ isinstance(transformation, Transformation.Pool) for transformation in transformations ]), ValueError( "You cannot specify both `inline_pool_option` and `Pool` transformation for the same" " embedding at the same time. Please pick one!")) # dtype check_true( isinstance(dtype, torch.dtype) or issubclass(dtype, np.generic), TypeError("The dtype must be either a numpy or torch type.")) check_true( not is_finetuneable or isinstance(dtype, torch.dtype), TypeError( "The dtype must be torch for model to be fine-tuneable.")) check_true( not is_finetuneable or TextWiser._check_finetuneable(embedding, transformations), ValueError( "Model must have fine-tuneable weights if `is_finetuneable` is specified." ))
def _validate(self, finetune_enabled=False): check_true( isinstance(self.deterministic, bool), TypeError("The deterministic parameter should be a boolean.")) if self.tokenizer: doc = "string" res = self.tokenizer(doc) check_true( isinstance(res, list), TypeError("The tokenizer should return a list of tokens.")) check_true(isinstance(res[0], str), TypeError("The tokens should be of string type.")) check_true( not self.pretrained or self.pretrained is Constants.default_model or (isinstance(self.pretrained, str) and os.path.exists(self.pretrained)) or hasattr(self.pretrained, 'read'), # file-like ValueError( "The pretrained model should be a path to a pickle file or a file-like object." ))
def _validate(self, finetune_enabled=False): check_true( isinstance(self.pool_option, PoolOptions), TypeError("The pool type must be models.options.PoolOptions"))
def _validate(self, finetune_enabled=False): check_true( isinstance(self.word_option, WordOptions), ValueError( "The embedding must be one of the supported word embeddings." )) check_true( self.pretrained or self.word_option is WordOptions.word2vec, ValueError( "Only word2vec embeddings can be trained from scratch.")) check_true( not finetune_enabled or self._is_finetuneable(), ValueError( "The weights can only be fine-tuned if they are not ELMo embeddings." )) check_false( not finetune_enabled and self.word_option == WordOptions.char, ValueError( "Character embeddings are only available if the model is fine-tuneable." )) check_true( not self.sparse or self.word_option == WordOptions.word2vec, ValueError( "Sparse embeddings only supported with word2vec embeddings" )) check_true( isinstance(self.layers, int) or all([isinstance(l, int) for l in self.layers]), ValueError( "Layers can only be an integer or a list of integers")) check_true( not self.inline_pool_option or isinstance(self.inline_pool_option, PoolOptions), ValueError( "Inline pooling should either be None or a pool option.")) if self.tokenizer: check_true( self.word_option == WordOptions.word2vec, ValueError( "The tokenizer can only be used if word2vec embeddings are used." )) doc = "string" res = self.tokenizer(doc) check_true( isinstance(res, list), TypeError("The tokenizer should return a list of tokens.")) check_true(isinstance(res[0], str), TypeError("The tokens should be of string type."))
def _validate(self, finetune_enabled=False): import tensorflow # this will fail if tensorflow is not available import tensorflow_hub # this will fail if tensorflow_hub is not available check_true(self.pretrained, ValueError("USE needs to be pretrained."))