示例#1
0
 def peek_quoted_part(phrase: str, match: Match,
                      start_func: Callable[[str, Match, Match], int],
                      end_func: Callable[[str, Match, Match], int],
                      match_prob: int) -> List[PatternFound]:
     """
     :param phrase: the whole text, may be used for getting the definition's text length
     :param match: the matched part of the phrase that may contain several quote-packed definitions
     :param start_func: (phrase, match, quoted_match) -> definition's start
     :param end_func: (phrase, match, quoted_match) -> definition's end
     :param match_prob: definition's probability
     :return: a list of definitions found or an empty list
     """
     defs = []
     text = match.group()
     quoted_entries = [
         m for m in CommonDefinitionPatterns.reg_quoted.finditer(text)
     ]
     if len(quoted_entries) == 0:
         return defs
     for entry in quoted_entries:
         df = PatternFound()
         df.name = entry.group()
         df.start = start_func(phrase, match, entry)
         df.end = end_func(phrase, match, entry)
         df.probability = match_prob
         defs.append(df)
     return defs
示例#2
0
 def match_acronyms(phrase: str) -> List[PatternFound]:
     """
     :param phrase: rompió el silencio tras ser despedido del Canal del Fútbol (CDF).
     :return: {name: 'CDF', probability: 100, ...}
     """
     defs = []
     for match in CommonDefinitionPatterns.reg_acronyms.finditer(phrase):
         acr_start = CommonDefinitionPatterns.get_acronym_words_start(phrase, match)
         if acr_start < 0:
             continue
         df = PatternFound()
         df.name = match.group().strip('() ')
         df.start = acr_start
         df.end = match.start() - 1
         df.probability = 100
         defs.append(df)
     return defs
示例#3
0
    def match_es_def_by_semicolon(phrase: str) -> List[PatternFound]:
        """
        :param phrase: "Modern anatomy human": a human of modern anatomy.
        :return: {name: 'Modern anatomy human', probability: 100, ...}
        """
        prob = 100
        defs = []

        for match in CommonDefinitionPatterns.reg_semicolon.finditer(phrase):
            df = PatternFound()
            df.name = match.group()
            df.start = 0
            df.end = len(phrase)
            df.probability = prob
            defs.append(df)
            prob = 66

        return defs
示例#4
0
    def collect_regex_matches_with_quoted_chunks(
            phrase: str, reg: re, prob: int,
            quoted_def_start: Callable[[str, Match, Match], int],
            quoted_def_end: Callable[[str, Match, Match],
                                     int], def_start: Callable[[str, Match],
                                                               int],
            def_end: Callable[[str, Match], int]) -> List[PatternFound]:
        """
        First, find all matches by 'reg' ptr
        Second, go through matches
        For each match try to find a set of quoted words
        If found, use them as matches
        Or use the whole match
        :param quoted_def_start: (phrase, match, quoted_match) -> definition's start
        :param quoted_def_end: (phrase, match, quoted_match) -> definition's end
        :param def_start: (phrase, match) -> definition's start
        :param def_end: (phrase, match) -> definition's end
        :return:
        """
        defs = []
        for match in reg.finditer(phrase):
            quoted_matches = \
                CommonDefinitionPatterns.peek_quoted_part(phrase,
                                                          match,
                                                          quoted_def_start,
                                                          quoted_def_end,
                                                          prob)
            if len(quoted_matches) > 0:
                defs += quoted_matches
                continue

            df = PatternFound()
            df.name = match.group()
            df.start = def_start(phrase, match)
            df.end = def_end(phrase, match)
            df.probability = prob
            defs.append(df)

        return defs
示例#5
0
    def collect_regex_matches(
            phrase: str, reg: re, prob: int, def_start: Callable[[str, Match],
                                                                 int],
            def_end: Callable[[str, Match], int]) -> List[PatternFound]:
        """
        find all matches by 'reg' ptr
        :param quoted_def_start: (phrase, match, quoted_match) -> definition's start
        :param quoted_def_end: (phrase, match, quoted_match) -> definition's end
        :param def_start: (phrase, match) -> definition's start
        :param def_end: (phrase, match) -> definition's end
        :return:
        """
        defs = []
        for match in reg.finditer(phrase):

            df = PatternFound()
            df.name = match.group()
            df.start = def_start(phrase, match)
            df.end = def_end(phrase, match)
            df.probability = prob
            defs.append(df)

        return defs