def test_instance_implements_mutable_mapping(self): words_field = TextField([Token("hello")], {}) label_field = LabelField(1, skip_indexing=True) instance = Instance({"words": words_field, "labels": label_field}) assert instance["words"] == words_field assert instance["labels"] == label_field assert len(instance) == 2 keys = {k for k, v in instance.items()} assert keys == {"words", "labels"} values = [v for k, v in instance.items()] assert words_field in values assert label_field in values
def _get_instance_tokenization(self, instance: Instance) -> Dict[str, Any]: """Gets the tokenization information to current instance""" def extract_field_tokens( field: Field, ) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]: """Tries to extract tokens from field""" def token_2_attribution(token: Token, attribution: float = 0.0 ) -> Dict[str, Any]: return {"token": token.text, "attribution": attribution} if isinstance(field, TextField): return [ token_2_attribution(token) for token in cast(TextField, field).tokens ] if isinstance(field, ListField): return [ extract_field_tokens(inner_field) for inner_field in cast(ListField, field) ] raise WrongValueError( f"Cannot extract fields from [{type(field)}]") return { name: extract_field_tokens(field) for name, field in instance.items() }
def _extract_tokens(self, instance: Instance) -> List[Union[Token, List[Token]]]: """Extracts the tokens from all TextFields in an instance. This is a generic implementation and you might have to overwrite it for your specific head. Parameters ---------- instance The instance underlying the prediction Returns ------- tokens """ tokens: List[Union[Token, List[Token]]] = [] for field_name, field in instance.items(): if isinstance(field, TextField): tokens += self._extract_tokens_from_text_field( field, field_name) elif isinstance(field, ListField): for single_field in field: if isinstance(single_field, TextField): tokens.append( self._extract_tokens_from_text_field( single_field, field_name)) return tokens