def _update_recognizers_based_on_entities_to_keep(
            self, analyzer_engine: AnalyzerEngine):
        """Check if there are any entities not supported by this presidio instance.
        Add ORGANIZATION as it is removed by default

        """
        supported_entities = analyzer_engine.get_supported_entities(
            language=self.language)
        print("Entities supported by this Presidio Analyzer instance:")
        print(", ".join(supported_entities))

        if not self.entities:
            self.entities = supported_entities

        for entity in self.entities:
            if entity not in supported_entities:
                print(
                    f"Entity {entity} is not supported by this instance of Presidio Analyzer Engine"
                )

        if "ORGANIZATION" in self.entities and "ORGANIZATION" not in supported_entities:
            recognizers = analyzer_engine.get_recognizers()
            spacy_recognizer = [
                rec for rec in recognizers if rec.name == "SpacyRecognizer"
                or rec.name == "StanzaRecognizer"
            ]
            if len(spacy_recognizer):
                spacy_recognizer = spacy_recognizer[0]
                spacy_recognizer.supported_entities.append("ORGANIZATION")
                self.entities.append("ORGANIZATION")
                print(
                    "Added ORGANIZATION as a supported entity from spaCy/Stanza"
                )
Пример #2
0
def test_when_get_supported_fields_then_return_all_languages(
        mock_registry, unit_test_guid, nlp_engine):
    analyzer = AnalyzerEngine(registry=mock_registry, nlp_engine=nlp_engine)
    entities = analyzer.get_supported_entities()

    assert len(entities) == 3
    assert "CREDIT_CARD" in entities
    assert "DOMAIN_NAME" in entities
    assert "PHONE_NUMBER" in entities
Пример #3
0
def test_when_get_supported_fields_specific_language_then_return_single_result(
        loaded_registry, unit_test_guid, nlp_engine):
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer(
        "ROCKET",
        name="Rocket recognizer RU",
        patterns=[pattern],
        supported_language="ru",
    )

    analyzer = AnalyzerEngine(registry=loaded_registry, nlp_engine=nlp_engine)
    analyzer.registry.add_recognizer(pattern_recognizer)
    entities = analyzer.get_supported_entities(language="ru")

    assert len(entities) == 1
    assert "ROCKET" in entities