Exemplo n.º 1
0
 def __init__(self, stopwords=[], punct_words=[], nlp_artifacts=None):
     self.stopwords = stopwords
     self.punct_words = punct_words
     if nlp_artifacts is None:
         self.nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
     else:
         self.nlp_artifacts = nlp_artifacts
Exemplo n.º 2
0
def loaded_analyzer_engine(loaded_registry, app_tracer):
    mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
    analyzer_engine = AnalyzerEngine(
        loaded_registry,
        NlpEngineMock(stopwords=[], punct_words=[], nlp_artifacts=mock_nlp_artifacts),
        app_tracer=app_tracer,
        log_decision_process=True,
    )
    return analyzer_engine
Exemplo n.º 3
0
 def doc_to_nlp_artifact(self, doc, language):
     tokens = [token.text for token in doc]
     lemmas = [token.lemma_ for token in doc]
     tokens_indices = [token.idx for token in doc]
     entities = doc.ents
     return NlpArtifacts(entities=entities,
                         tokens=tokens,
                         tokens_indices=tokens_indices,
                         lemmas=lemmas,
                         nlp_engine=self,
                         language=language)
Exemplo n.º 4
0
 def __init__(self, *args, **kwargs):
     super(TestAnalyzerEngine, self).__init__(*args, **kwargs)
     self.loaded_registry = MockRecognizerRegistry(RecognizerStoreApiMock())
     mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
     self.app_tracer = AppTracerMock(enable_interpretability=True)
     self.loaded_analyzer_engine = AnalyzerEngine(self.loaded_registry,
                                                  MockNlpEngine(stopwords=[],
                                                                punct_words=[],
                                                                nlp_artifacts=mock_nlp_artifacts),
                                                  app_tracer=self.app_tracer,
                                                  enable_trace_pii=True)
     self.unit_test_guid = "00000000-0000-0000-0000-000000000000"
Exemplo n.º 5
0
 def _doc_to_nlp_artifact(self, doc: Doc, language: str) -> NlpArtifacts:
     lemmas = [token.lemma_ for token in doc]
     tokens_indices = [token.idx for token in doc]
     entities = doc.ents
     return NlpArtifacts(
         entities=entities,
         tokens=doc,
         tokens_indices=tokens_indices,
         lemmas=lemmas,
         nlp_engine=self,
         language=language,
     )
Exemplo n.º 6
0
    def test_text_with_context_improves_score(self):
        nlp_engine = TESTS_NLP_ENGINE
        mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")

        for item in self.context_sentences:
            text = item[0]
            recognizer = item[1]
            entities = item[2]
            nlp_artifacts = nlp_engine.process_text(text, "en")
            results_without_context = recognizer.analyze(text, entities, mock_nlp_artifacts)
            results_with_context = recognizer.analyze(text, entities, nlp_artifacts)

            assert(len(results_without_context) == len(results_with_context))
            for i in range(len(results_with_context)):
                assert(results_without_context[i].score < results_with_context[i].score)
Exemplo n.º 7
0
    def test_context_custom_recognizer(self):
        nlp_engine = TESTS_NLP_ENGINE
        mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")

        # This test checks that a custom recognizer is also enhanced by context.
        # However this test also verifies a specific case in which the pattern also
        # includes a preceeding space (' rocket'). This in turn cause for a misalignment
        # between the tokens and the regex match (the token will be just 'rocket').
        # This misalignment is handled in order to find the correct context window.
        rocket_recognizer = PatternRecognizer(supported_entity="ROCKET",
                                              name="rocketrecognizer",
                                              context=["cool"],
                                              patterns=[Pattern("rocketpattern",
                                                                "\\s+(rocket)",
                                                                0.3)])
        text = "hi, this is a cool ROCKET"
        recognizer = rocket_recognizer
        entities = ["ROCKET"]
        nlp_artifacts = nlp_engine.process_text(text, "en")
        results_without_context = recognizer.analyze(text, entities, mock_nlp_artifacts)
        results_with_context = recognizer.analyze(text, entities, nlp_artifacts)
        assert(len(results_without_context) == len(results_with_context))
        for i in range(len(results_with_context)):
            assert(results_without_context[i].score < results_with_context[i].score)
Exemplo n.º 8
0
def mock_nlp_artifacts():
    return NlpArtifacts([], [], [], [], None, "en")