Пример #1
0
 def build_regexes(self) -> None:
     self.re_patient = get_regex_from_elements(self.re_patient_elements)
     self.re_tp = get_regex_from_elements(self.re_tp_elements)
     self.regexes_built = True
     # Note that the regexes themselves may be None even if they have
     # been built.
     if self.debug:
         log.debug("Patient scrubber: {}".format(
             self.get_patient_regex_string()))
         log.debug("Third party scrubber: {}".format(
             self.get_tp_regex_string()))
Пример #2
0
 def build_regexes(self) -> None:
     """
     Compile our regexes.
     """
     self.re_patient = get_regex_from_elements(self.re_patient_elements)
     self.re_tp = get_regex_from_elements(self.re_tp_elements)
     self.regexes_built = True
     # Note that the regexes themselves may be None even if they have
     # been built.
     if self.debug:
         log.debug(f"Patient scrubber: {self.get_patient_regex_string()}")
         log.debug(f"Third party scrubber: {self.get_tp_regex_string()}")
Пример #3
0
 def build(self) -> None:
     """
     Compiles a high-speed scrubbing device, be it a regex or a FlashText
     processor. Called only when we have collected all our words.
     """
     if self.regex_method:
         elements = []  # type: List[str]
         for w in self.words:
             elements.extend(
                 get_string_regex_elements(
                     w,
                     suffixes=self.suffixes,
                     at_word_boundaries_only=self.at_word_boundaries_only,
                     max_errors=self.max_errors))
         log.debug(f"Building regex with {len(elements)} elements")
         self._regex = get_regex_from_elements(elements)
     else:
         if self.words:
             self._processor = KeywordProcessorFixed(case_sensitive=False)
             self._processor.set_non_word_boundaries(
                 FLASHTEXT_WORD_CHARACTERS)
             replacement = self.replacement_text
             log.debug(f"Building FlashText processor with "
                       f"{len(self.words)} keywords")
             for w in self.words:
                 self._processor.add_keyword(w, replacement)
         else:
             self._processor = None  # type: Optional[KeywordProcessorFixed]
     self._built = True
Пример #4
0
 def build_regex(self) -> None:
     elements = []
     for w in self.words:
         elements.extend(get_string_regex_elements(
             w,
             suffixes=self.suffixes,
             at_word_boundaries_only=self.at_word_boundaries_only,
             max_errors=self.max_errors
         ))
     self._regex = get_regex_from_elements(elements)
     self._regex_built = True
Пример #5
0
 def build_regex(self) -> None:
     elements = []
     if self.scrub_all_uk_postcodes:
         elements.extend(
             get_uk_postcode_regex_elements(
                 at_word_boundaries_only=
                 self.anonymise_codes_at_word_boundaries_only))
     # noinspection PyTypeChecker
     for n in self.scrub_all_numbers_of_n_digits:
         elements.extend(get_number_of_length_n_regex_elements(
             n,
             at_word_boundaries_only=(
                 self.anonymise_numbers_at_word_boundaries_only)
         ))
     self._regex = get_regex_from_elements(elements)
     self._regex_built = True
Пример #6
0
 def build_regex(self) -> None:
     """
     Compile our high-speed regex.
     """
     elements = []  # type: List[str]
     if self.scrub_all_uk_postcodes:
         elements.extend(
             get_uk_postcode_regex_elements(
                 at_word_boundaries_only=self.
                 anonymise_codes_at_word_boundaries_only))
     # noinspection PyTypeChecker
     for n in self.scrub_all_numbers_of_n_digits:
         elements.extend(
             get_number_of_length_n_regex_elements(
                 n,
                 at_word_boundaries_only=(
                     self.anonymise_numbers_at_word_boundaries_only)))
     if self.extra_regexes:
         elements.extend(self.extra_regexes)
     self._regex = get_regex_from_elements(elements)
     self._regex_built = True