def test_check_correct_entity_annotations( text: Text, warnings: int, whitespace_tokenizer: WhitespaceTokenizer): reader = RasaYAMLReader() training_data = reader.reads(text) whitespace_tokenizer.process_training_data(training_data) with pytest.warns(UserWarning) as record: EntityExtractorMixin.check_correct_entity_annotations(training_data) assert len(record) == warnings assert all([excerpt in record[0].message.args[0]] for excerpt in ["Misaligned entity annotation in sentence"])
def test_convert_tags_to_entities( text: Text, tags: Dict[Text, List[Text]], confidences: Dict[Text, List[float]], expected_entities: List[Dict[Text, Any]], whitespace_tokenizer: WhitespaceTokenizer, ): extractor = EntityExtractorMixin() message = Message(data={TEXT: text}) tokens = whitespace_tokenizer.tokenize(message, TEXT) split_entities_config = {SPLIT_ENTITIES_BY_COMMA: True} actual_entities = extractor.convert_predictions_into_entities( text, tokens, tags, split_entities_config, confidences) assert actual_entities == expected_entities
def test_split_entities_by_comma( text: Text, tags: Dict[Text, List[Text]], confidences: Dict[Text, List[float]], expected_entities: List[Dict[Text, Any]], ): extractor = EntityExtractor() tokenizer = WhitespaceTokenizer() message = Message(data={TEXT: text}) tokens = tokenizer.tokenize(message, TEXT) split_entities_config = { SPLIT_ENTITIES_BY_COMMA: True, "address": False, "ingredient": True, } actual_entities = extractor.convert_predictions_into_entities( text, tokens, tags, split_entities_config, confidences) assert actual_entities == expected_entities