def index(self, vocab: Vocabulary): if self._label_ids is None: self._label_ids = [ vocab.get_token_index(label, self._label_namespace) # type: ignore for label in self.labels ] if not self._num_labels: self._num_labels = vocab.get_vocab_size(self._label_namespace)
def index(self, vocab: Vocabulary): if not self._skip_indexing: self._indexed_labels = [ vocab.get_token_index(label, self._label_namespace) # type: ignore for label in self.labels ]
def indices_to_tokens(self, indexed_tokens: IndexedTokenList, vocabulary: Vocabulary) -> List[Token]: self._add_encoding_to_vocabulary_if_needed(vocabulary) token_ids = indexed_tokens["token_ids"] type_ids = indexed_tokens.get("type_ids") return [ Token( text=vocabulary.get_token_from_index(token_ids[i], self._namespace), text_id=token_ids[i], type_id=type_ids[i] if type_ids is not None else None, ) for i in range(len(token_ids)) ]
def tokens_to_indices(self, tokens: List[Token], vocabulary: Vocabulary) -> Dict[str, List[int]]: indices: List[int] = [] for token in itertools.chain(self._start_tokens, tokens, self._end_tokens): text = self._get_feature_value(token) if self.namespace is None: # We could have a check here that `text` is an int; not sure it's worth it. indices.append(text) # type: ignore else: if self.lowercase_tokens: text = text.lower() indices.append(vocabulary.get_token_index( text, self.namespace)) return {"tokens": indices}
def tokens_to_indices( self, tokens: List[Token], vocabulary: Vocabulary) -> Dict[str, List[List[int]]]: indices: List[List[int]] = [] for token in itertools.chain(self._start_tokens, tokens, self._end_tokens): token_indices: List[int] = [] if token.text is None: raise ConfigurationError( "TokenCharactersIndexer needs a tokenizer that retains text" ) for character in self._character_tokenizer.tokenize(token.text): if getattr(character, "text_id", None) is not None: # `text_id` being set on the token means that we aren't using the vocab, we just # use this id instead. index = character.text_id else: index = vocabulary.get_token_index(character.text, self._namespace) token_indices.append(index) indices.append(token_indices) return {"token_characters": indices}
def index(self, vocab: Vocabulary): if self.labels is not None: self._indexed_labels = [ vocab.get_token_index(label, self._label_namespace) for label in self.labels ]
def index(self, vocab: Vocabulary): if not self._skip_indexing: self._label_id = vocab.get_token_index( self.label, self._label_namespace # type: ignore )
def index(self, vocab: Vocabulary): self._mapping_array = [ vocab.get_token_index(x.ensure_text(), self._target_namespace) for x in self._source_tokens ]