示例#1
0
    def test_added_pattern_recognizer_works(self):
        pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("ROCKET",
                                               name="Rocket recognizer",
                                               patterns=[pattern])

        # Make sure the analyzer doesn't get this entity
        recognizers_store_api_mock = RecognizerStoreApiMock()
        analyze_engine = AnalyzerEngine(
            registry=MockRecognizerRegistry(recognizers_store_api_mock),
            nlp_engine=MockNlpEngine())
        text = "rocket is my favorite transportation"
        entities = ["CREDIT_CARD", "ROCKET"]

        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 0

        # Add a new recognizer for the word "rocket" (case insensitive)
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)

        # Check that the entity is recognized:
        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 1
        assert_result(results[0], "ROCKET", 0, 7, 0.8)
示例#2
0
 def __init__(self, *args, **kwargs):
     super(TestAnalyzerEngine, self).__init__(*args, **kwargs)
     self.loaded_registry = MockRecognizerRegistry(RecognizerStoreApiMock())
     mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
     self.loaded_analyzer_engine = AnalyzerEngine(
         self.loaded_registry,
         MockNlpEngine(stopwords=[],
                       punct_words=[],
                       nlp_artifacts=mock_nlp_artifacts))
示例#3
0
 def test_when_allFields_is_true_and_entities_not_empty_exception(self):
     analyze_engine = AnalyzerEngine(registry=RecognizerRegistry(),
                                     nlp_engine=MockNlpEngine())
     request = AnalyzeRequest()
     request.text = "My name is David and I live in Seattle." \
                    "Domain: microsoft.com "
     request.analyzeTemplate.allFields = True
     new_field = request.analyzeTemplate.fields.add()
     new_field.name = 'CREDIT_CARD'
     new_field.minScore = '0.5'
     with pytest.raises(ValueError):
         analyze_engine.Apply(request, None)
示例#4
0
 def __init__(self, *args, **kwargs):
     super(TestAnalyzerEngine, self).__init__(*args, **kwargs)
     self.loaded_registry = MockRecognizerRegistry(RecognizerStoreApiMock())
     mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
     self.app_tracer = AppTracerMock(enable_interpretability=True)
     self.loaded_analyzer_engine = AnalyzerEngine(self.loaded_registry,
                                                  MockNlpEngine(stopwords=[],
                                                                punct_words=[],
                                                                nlp_artifacts=mock_nlp_artifacts),
                                                  app_tracer=self.app_tracer,
                                                  enable_trace_pii=True)
     self.unit_test_guid = "00000000-0000-0000-0000-000000000000"
示例#5
0
    def test_when_default_threshold_is_zero_all_results_pass(self):
        text = " Credit card: 4095-2609-9393-4932,  my phone is 425 8829090"
        language = "en"
        entities = ["CREDIT_CARD", "PHONE_NUMBER"]

        # This analyzer engine is different from the global one, as this one
        # also loads SpaCy so it can detect the phone number entity

        analyzer_engine = AnalyzerEngine(
            registry=self.loaded_registry, nlp_engine=MockNlpEngine())
        results = analyzer_engine.analyze(self.unit_test_guid, text,
                                          entities, language,
                                          all_fields=False)

        assert len(results) == 2
示例#6
0
    def test_when_allFields_is_true_return_all_fields(self):
        analyze_engine = AnalyzerEngine(registry=MockRecognizerRegistry(),
                                        nlp_engine=MockNlpEngine())
        request = AnalyzeRequest()
        request.analyzeTemplate.allFields = True
        request.analyzeTemplate.resultsScoreThreshold = 0
        request.text = " Credit card: 4095-2609-9393-4932,  my phone is 425 8829090 " \
                       "Domain: microsoft.com"
        response = analyze_engine.Apply(request, None)
        returned_entities = [
            field.field.name for field in response.analyzeResults]

        assert response.analyzeResults is not None
        assert "CREDIT_CARD" in returned_entities
        assert "PHONE_NUMBER" in returned_entities
        assert "DOMAIN_NAME" in returned_entities
示例#7
0
    def test_get_recognizers_returns_supported_language(self):
        pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("ROCKET",
                                               name="Rocket recognizer RU",
                                               patterns=[pattern],
                                               supported_language="ru")

        recognizers_store_api_mock = RecognizerStoreApiMock()
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)
        analyze_engine = AnalyzerEngine(
            registry=MockRecognizerRegistry(recognizers_store_api_mock),
            nlp_engine=MockNlpEngine())
        request = RecognizersAllRequest(language="ru")
        response = analyze_engine.GetAllRecognizers(request, None)
        # there is only 1 mocked russian recognizer
        assert len(response) == 1
示例#8
0
    def test_removed_pattern_recognizer_doesnt_work(self):
        pattern = Pattern("spaceship pattern", r'\W*(spaceship)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("SPACESHIP",
                                               name="Spaceship recognizer",
                                               patterns=[pattern])

        # Make sure the analyzer doesn't get this entity
        recognizers_store_api_mock = RecognizerStoreApiMock()
        analyze_engine = AnalyzerEngine(
            registry=MockRecognizerRegistry(recognizers_store_api_mock),
            nlp_engine=MockNlpEngine())
        text = "spaceship is my favorite transportation"
        entities = ["CREDIT_CARD", "SPACESHIP"]

        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 0

        # Add a new recognizer for the word "rocket" (case insensitive)
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)
        # Check that the entity is recognized:
        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)
        assert len(results) == 1
        assert_result(results[0], "SPACESHIP", 0, 10, 0.8)

        # Remove recognizer
        recognizers_store_api_mock.remove_recognizer("Spaceship recognizer")
        # Test again to see we didn't get any results
        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 0
示例#9
0
    def test_get_recognizers_returns_added_custom(self):
        pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("ROCKET",
                                               name="Rocket recognizer",
                                               patterns=[pattern])

        recognizers_store_api_mock = RecognizerStoreApiMock()

        analyze_engine = AnalyzerEngine(
            registry=MockRecognizerRegistry(recognizers_store_api_mock),
            nlp_engine=MockNlpEngine())
        request = RecognizersAllRequest(language="en")
        response = analyze_engine.GetAllRecognizers(request, None)
        # there are 15 predefined recognizers
        assert len(response) == 15
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)
        response = analyze_engine.GetAllRecognizers(request, None)
        # there are 15 predefined recognizers and one custom
        assert len(response) == 16