def test_when_get_recognizers_then_returns_supported_language(): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer( "ROCKET", name="Rocket recognizer RU", patterns=[pattern], supported_language="ru", ) mock_recognizer_registry = RecognizerRegistryMock() mock_recognizer_registry.add_recognizer(pattern_recognizer) analyze_engine = AnalyzerEngine( registry=mock_recognizer_registry, nlp_engine=NlpEngineMock(), ) response = analyze_engine.get_recognizers(language="ru") # there is only 1 mocked russian recognizer assert len(response) == 1
def test_when_analyze_added_pattern_recognizer_then_succeed(unit_test_guid): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer("ROCKET", name="Rocket recognizer", patterns=[pattern]) mock_recognizer_registry = RecognizerRegistryMock() # Make sure the analyzer doesn't get this entity analyze_engine = AnalyzerEngine( registry=mock_recognizer_registry, nlp_engine=NlpEngineMock(), ) text = "rocket is my favorite transportation" entities = ["CREDIT_CARD", "ROCKET"] results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", ) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) mock_recognizer_registry.add_recognizer(pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", ) assert len(results) == 1 assert_result(results[0], "ROCKET", 0, 7, 0.8)
def test_when_removed_pattern_recognizer_then_doesnt_work(unit_test_guid): pattern = Pattern("spaceship pattern", r"\W*(spaceship)\W*", 0.8) pattern_recognizer = PatternRecognizer("SPACESHIP", name="Spaceship recognizer", patterns=[pattern]) mock_recognizer_registry = RecognizerRegistryMock() # Make sure the analyzer doesn't get this entity analyze_engine = AnalyzerEngine( registry=mock_recognizer_registry, nlp_engine=NlpEngineMock(), ) text = "spaceship is my favorite transportation" entities = ["CREDIT_CARD", "SPACESHIP"] results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", ) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) mock_recognizer_registry.add_recognizer(pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", ) assert len(results) == 1 assert_result(results[0], "SPACESHIP", 0, 10, 0.8) # Remove recognizer mock_recognizer_registry.remove_recognizer("Spaceship recognizer") # Test again to see we didn't get any results results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", ) assert len(results) == 0
def mock_registry(): return RecognizerRegistryMock()