def _produce_separator_split_token( self, remainder: int, word: str, mo: regex, prefix: str, offset: int) -> Generator[Token, None, str]: """Helper method to handle alnum words with `_separation` patterns.""" if mo.start() > remainder: if Tokenizer._apostrophe_t.fullmatch( mo.group(0)) and word[mo.start() - 1] == 'n': if remainder < mo.start() - 1: yield Token(prefix, word[remainder:mo.start() - 1], offset + remainder) prefix = "" yield Token( prefix, "not" if self.replace_not_contraction else 'n' + mo.group(0), offset + mo.start()) return "" yield Token(prefix, word[remainder:mo.start()], offset + remainder) prefix = "" separator = mo.group(0) if separator and self._can_emit(separator): yield Token(prefix, separator, offset + mo.start()) return "" else: return prefix + separator
def collect_regex_matches_with_quoted_chunks( phrase: str, reg: re, prob: int, quoted_def_start: Callable[[str, Match, Match], int], quoted_def_end: Callable[[str, Match, Match], int], def_start: Callable[[str, Match], int], def_end: Callable[[str, Match], int]) -> List[PatternFound]: """ First, find all matches by 'reg' ptr Second, go through matches For each match try to find a set of quoted words If found, use them as matches Or use the whole match :param quoted_def_start: (phrase, match, quoted_match) -> definition's start :param quoted_def_end: (phrase, match, quoted_match) -> definition's end :param def_start: (phrase, match) -> definition's start :param def_end: (phrase, match) -> definition's end :return: """ defs = [] for match in reg.finditer(phrase): quoted_matches = \ CommonDefinitionPatterns.peek_quoted_part(phrase, match, quoted_def_start, quoted_def_end, prob) if len(quoted_matches) > 0: defs += quoted_matches continue df = PatternFound() df.name = match.group() df.start = def_start(phrase, match) df.end = def_end(phrase, match) df.probability = prob defs.append(df) return defs
def collect_regex_matches( phrase: str, reg: re, prob: int, def_start: Callable[[str, Match], int], def_end: Callable[[str, Match], int]) -> List[PatternFound]: """ find all matches by 'reg' ptr :param quoted_def_start: (phrase, match, quoted_match) -> definition's start :param quoted_def_end: (phrase, match, quoted_match) -> definition's end :param def_start: (phrase, match) -> definition's start :param def_end: (phrase, match) -> definition's end :return: """ defs = [] for match in reg.finditer(phrase): df = PatternFound() df.name = match.group() df.start = def_start(phrase, match) df.end = def_end(phrase, match) df.probability = prob defs.append(df) return defs