def _process_entity(entity, sync=False): """Perform pre-index processing on an entity, includes running the NLP pipeline.""" analyze_entity(entity) refresh_entity(entity.id, sync=sync) # log.debug("Index: %r", entity) return entity
def test_language_tagging(self): text = "C'est le caniche d'Emmanuel Macron. " * 2 entity = model.make_entity('PlainText') entity.add('bodyText', text) analyze_entity(entity) names = entity.get_type_values(registry.name) assert "d'Emmanuel Macron" in names, names assert entity.get('detectedLanguage') == ['fra'], entity.get('detectedLanguage') # noqa
def test_ner_extract(self): text = 'Das ist der Pudel von Angela Merkel. ' text = text * 5 entity = model.make_entity('PlainText') entity.add('bodyText', text) analyze_entity(entity) names = entity.get_type_values(registry.name) assert 'Angela Merkel' in names, names
def test_pattern_extract(self): text = "Mr. Flubby Flubber called the number tel:+919988111222 twice" entity = model.make_entity('PlainText') entity.add('bodyText', text) analyze_entity(entity) phones = entity.get_type_values(registry.phone) assert '+919988111222' in phones countries = entity.get_type_values(registry.country) assert 'in' in countries
def _process_entity(entity, sync=False): """Perform pre-index processing on an entity, includes running the NLP pipeline.""" if entity.id is None: raise InvalidData("No ID for entity", errors=entity.to_dict()) analyze_entity(entity) if sync: refresh_entity_id(entity.id) # log.debug("Index: %r", entity) return entity