Exemplo n.º 1
0
    def __init__(self,
                 registry=None,
                 nlp_engine=None,
                 app_tracer=None,
                 enable_trace_pii=False,
                 default_score_threshold=None,
                 use_recognizer_store=False,
                 default_language="en"):
        """
        AnalyzerEngine class: Orchestrating the detection of PII entities
        and all related logic
        :param registry: instance of type RecognizerRegistry
        :param nlp_engine: instance of type NlpEngine
        (for example SpacyNlpEngine)
        :param app_tracer: instance of type AppTracer,
        used to trace the logic used during each request
        :param enable_trace_pii: bool,
        defines whether PII values should be traced or not.
        :param default_score_threshold: Minimum confidence value
        for detected entities to be returned
        :param use_recognizer_store Whether to call the
        Presidio Recognizer Store on every request to gather
        responses from custom recognizers as well
        (only applicable for the full Presidio service)
        """
        if not nlp_engine:
            logger.info("nlp_engine not provided. Creating new "
                        "SpacyNlpEngine instance")
            nlp_engine = NLP_ENGINES["spacy"]()
        if not registry:
            logger.info("Recognizer registry not provided. "
                        "Creating default RecognizerRegistry instance")
            if use_recognizer_store:
                recognizer_store_api = RecognizerStoreApi()
            else:
                recognizer_store_api = None
            registry = RecognizerRegistry(
                recognizer_store_api=recognizer_store_api)
        if not app_tracer:
            app_tracer = AppTracer()

        # load nlp module
        self.nlp_engine = nlp_engine
        # prepare registry
        self.registry = registry
        # load all recognizers
        if not registry.recognizers:
            registry.load_predefined_recognizers()

        self.app_tracer = app_tracer
        self.enable_trace_pii = enable_trace_pii

        self.default_score_threshold = default_score_threshold \
            if default_score_threshold \
            else 0.0

        self.default_language = default_language
Exemplo n.º 2
0
    def __init__(self,
                 registry=None,
                 nlp_engine=None,
                 app_tracer=None,
                 enable_trace_pii=False,
                 default_score_threshold=None):
        """
        AnalyzerEngine class: Orchestrating the detection of PII entities
        and all related logic
        :param registry: instance of type RecognizerRegistry
        :param nlp_engine: instance of type NlpEngine
        (for example SpacyNlpEngine)
        :param app_tracer: instance of type AppTracer,
        used to trace the logic used during each request
        :param enable_trace_pii: bool,
        defines whether PII values should be traced or not.
        :param default_score_threshold: Minimum confidence value
        for detected entities to be returned
        """
        if not nlp_engine:
            logger.info("nlp_engine not provided. Creating new "
                        "SpacyNlpEngine instance")
            from presidio_analyzer.nlp_engine import SpacyNlpEngine
            nlp_engine = SpacyNlpEngine()
        if not registry:
            logger.info("Recognizer registry not provided. "
                        "Creating default RecognizerRegistry instance")
            from presidio_analyzer import RecognizerRegistry
            registry = RecognizerRegistry()
        if not app_tracer:
            app_tracer = AppTracer()

        # load nlp module
        self.nlp_engine = nlp_engine
        # prepare registry
        self.registry = registry
        # load all recognizers
        if not registry.recognizers:
            registry.load_predefined_recognizers()

        self.app_tracer = app_tracer
        self.enable_trace_pii = enable_trace_pii

        if default_score_threshold is None:
            self.default_score_threshold = 0
        else:
            self.default_score_threshold = default_score_threshold
Exemplo n.º 3
0
def test_when_add_recognizer_then_also_outputs_others(nlp_engine):
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer(
        "ROCKET",
        name="Rocket recognizer",
        patterns=[pattern],
        supported_language="en",
    )
    registry = RecognizerRegistry()
    registry.add_recognizer(pattern_recognizer)
    registry.load_predefined_recognizers()

    assert len(registry.recognizers) > 1

    analyzer = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine)

    text = "Michael Jones has a rocket"

    results = analyzer.analyze(text=text, language="en")
    assert len(results) == 2
Exemplo n.º 4
0
    def __init__(
        self,
        registry: RecognizerRegistry = None,
        nlp_engine: NlpEngine = None,
        app_tracer: AppTracer = None,
        log_decision_process: bool = False,
        default_score_threshold: float = 0,
        supported_languages: List[str] = None,
    ):
        if not supported_languages:
            supported_languages = ["en"]

        if not nlp_engine:
            logger.info("nlp_engine not provided, creating default.")
            provider = NlpEngineProvider()
            nlp_engine = provider.create_engine()

        if not registry:
            logger.info("registry not provided, creating default.")
            registry = RecognizerRegistry()
        if not app_tracer:
            app_tracer = AppTracer()
        self.app_tracer = app_tracer

        self.supported_languages = supported_languages

        self.nlp_engine = nlp_engine
        self.registry = registry

        # load all recognizers
        if not registry.recognizers:
            registry.load_predefined_recognizers(
                nlp_engine=self.nlp_engine, languages=self.supported_languages
            )

        self.log_decision_process = log_decision_process
        self.default_score_threshold = default_score_threshold