def _update_recognizers_based_on_entities_to_keep( self, analyzer_engine: AnalyzerEngine): """Check if there are any entities not supported by this presidio instance. Add ORGANIZATION as it is removed by default """ supported_entities = analyzer_engine.get_supported_entities( language=self.language) print("Entities supported by this Presidio Analyzer instance:") print(", ".join(supported_entities)) if not self.entities: self.entities = supported_entities for entity in self.entities: if entity not in supported_entities: print( f"Entity {entity} is not supported by this instance of Presidio Analyzer Engine" ) if "ORGANIZATION" in self.entities and "ORGANIZATION" not in supported_entities: recognizers = analyzer_engine.get_recognizers() spacy_recognizer = [ rec for rec in recognizers if rec.name == "SpacyRecognizer" or rec.name == "StanzaRecognizer" ] if len(spacy_recognizer): spacy_recognizer = spacy_recognizer[0] spacy_recognizer.supported_entities.append("ORGANIZATION") self.entities.append("ORGANIZATION") print( "Added ORGANIZATION as a supported entity from spaCy/Stanza" )
def test_when_get_recognizers_then_returns_supported_language(): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer( "ROCKET", name="Rocket recognizer RU", patterns=[pattern], supported_language="ru", ) mock_recognizer_registry = RecognizerRegistryMock() mock_recognizer_registry.add_recognizer(pattern_recognizer) analyze_engine = AnalyzerEngine( registry=mock_recognizer_registry, nlp_engine=NlpEngineMock(), ) response = analyze_engine.get_recognizers(language="ru") # there is only 1 mocked russian recognizer assert len(response) == 1