示例#1
0
    def test_multi_group(self):
        comparable_map = TrimEmptyMatchesTest.comparable_map

        teststring = ("A1B2C3D no match.'~ Awhat doByouCthink??D ABisCD ABCD"
                      "AneverBCmindD  __ ABCXD ABC")
        regex = "A(.*?)B(.*?)C(?P<cd>.*?)D"

        real = tuple(re.finditer(regex, teststring))

        # Check again if our regex works.
        self.assertEqual(comparable_map(real),
                         ("A1B2C3D", "Awhat doByouCthink??D", "ABisCD", "ABCD",
                          "AneverBCmindD", "ABCXD"))

        self.assertEqual(comparable_map(trim_empty_matches(real)),
                         ("A1B2C3D", "Awhat doByouCthink??D", "ABisCD", "ABCD",
                          "AneverBCmindD", "ABCXD"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (1, 3))),
                         ("A1B2C3D", "Awhat doByouCthink??D", "AneverBCmindD",
                          "ABCXD"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (2,))),
                         ("A1B2C3D", "Awhat doByouCthink??D", "ABisCD"))

        self.assertEqual(comparable_map(trim_empty_matches(real, ("cd",))),
                         ("A1B2C3D", "Awhat doByouCthink??D", "AneverBCmindD",
                          "ABCXD"))
示例#2
0
    def test_single_group(self):
        comparable_map = TrimEmptyMatchesTest.comparable_map

        teststring = "AHelloB   s A B ABAB do what you want."
        regex = "A(.*?)B"

        # Using the iterator itself would require to invoke re.finditer again
        # and again before each assert.
        real = tuple(re.finditer(regex, teststring))

        # Ensure our regex is working like expected.
        self.assertEqual(comparable_map(real),
                         ("AHelloB", "A B", "AB", "AB"))

        # Default mode checks for group 0.
        self.assertEqual(comparable_map(trim_empty_matches(real)),
                         ("AHelloB", "A B", "AB", "AB"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (1,))),
                         ("AHelloB", "A B"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (0, 1))),
                         ("AHelloB", "A B", "AB", "AB"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (1, 0))),
                         ("AHelloB", "A B", "AB", "AB"))
    def test_multi_group(self):
        comparable_map = TrimEmptyMatchesTest.comparable_map

        teststring = ("A1B2C3D no match.'~ Awhat doByouCthink??D ABisCD ABCD"
                      "AneverBCmindD  __ ABCXD ABC")
        regex = "A(.*?)B(.*?)C(?P<cd>.*?)D"

        real = tuple(re.finditer(regex, teststring))

        # Check again if our regex works.
        self.assertEqual(comparable_map(real),
                         ("A1B2C3D", "Awhat doByouCthink??D", "ABisCD", "ABCD",
                          "AneverBCmindD", "ABCXD"))

        self.assertEqual(comparable_map(trim_empty_matches(real)),
                         ("A1B2C3D", "Awhat doByouCthink??D", "ABisCD", "ABCD",
                          "AneverBCmindD", "ABCXD"))

        self.assertEqual(
            comparable_map(trim_empty_matches(real, (1, 3))),
            ("A1B2C3D", "Awhat doByouCthink??D", "AneverBCmindD", "ABCXD"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (2, ))),
                         ("A1B2C3D", "Awhat doByouCthink??D", "ABisCD"))

        self.assertEqual(
            comparable_map(trim_empty_matches(real, ("cd", ))),
            ("A1B2C3D", "Awhat doByouCthink??D", "AneverBCmindD", "ABCXD"))
    def test_single_group(self):
        comparable_map = TrimEmptyMatchesTest.comparable_map

        teststring = "AHelloB   s A B ABAB do what you want."
        regex = "A(.*?)B"

        # Using the iterator itself would require to invoke re.finditer again
        # and again before each assert.
        real = tuple(re.finditer(regex, teststring))

        # Ensure our regex is working like expected.
        self.assertEqual(comparable_map(real), ("AHelloB", "A B", "AB", "AB"))

        # Default mode checks for group 0.
        self.assertEqual(comparable_map(trim_empty_matches(real)),
                         ("AHelloB", "A B", "AB", "AB"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (1, ))),
                         ("AHelloB", "A B"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (0, 1))),
                         ("AHelloB", "A B", "AB", "AB"))

        self.assertEqual(comparable_map(trim_empty_matches(real, (1, 0))),
                         ("AHelloB", "A B", "AB", "AB"))
示例#5
0
def search_in_between(begin, end, string, max_matches=0, remove_empty_matches=False, use_regex=False):
    """
    Searches for a string enclosed between a specified begin- and end-sequence.
    Also enclosed \\n are put into the result. Doesn't handle escape sequences.

    :param begin:                A pattern that defines where to start
                                 matching.
    :param end:                  A pattern that defines where to end matching.
    :param string:               The string where to search in.
    :param max_matches:          Defines the maximum number of matches. If 0 or
                                 less is provided, the number of matches is not
                                 limited.
    :param remove_empty_matches: Defines whether empty entries should
                                 be removed from the result. An entry is
                                 considered empty if no inner match was
                                 performed (regardless of matched start and
                                 end patterns).
    :param use_regex:            Specifies whether to treat the begin and end
                                 patterns as regexes or simple strings.
    :return:                     An iterator returning InBetweenMatch objects
                                 that hold information about the matched begin,
                                 inside and end string matched.
    """

    if not use_regex:
        begin = re.escape(begin)
        end = re.escape(end)
        # No need to compile the begin sequence, capturing groups get escaped.
        begin_pattern_groups = 0
    else:
        # Compilation of the begin sequence is needed to get the number of
        # capturing groups in it.
        begin_pattern_groups = re.compile(begin).groups

    # Regex explanation:
    # 1. (begin) A capturing group that matches the begin sequence.
    # 2. (.*?)   Match any char unlimited times, as few times as possible. Save
    #            the match in the second capturing group (`match.group(2)`).
    # 3. (end)   A capturing group that matches the end sequence.
    #            Because the previous group is lazy (matches as few times as
    #            possible) the next occurring end-sequence is matched.
    regex = "(" + begin + ")(.*?)(" + end + ")"

    matches = re.finditer(regex, string, re.DOTALL)

    if remove_empty_matches:
        matches = trim_empty_matches(matches, (begin_pattern_groups + 2,))

    matches = limit(matches, max_matches)

    for m in matches:
        yield InBetweenMatch.from_values(
            m.group(1),
            m.start(1),
            m.group(begin_pattern_groups + 2),
            m.start(begin_pattern_groups + 2),
            m.group(begin_pattern_groups + 3),
            m.start(begin_pattern_groups + 3),
        )
示例#6
0
文件: Core.py 项目: waffle-iron/coala
def unescaped_search_in_between(begin,
                                end,
                                string,
                                max_matches=0,
                                remove_empty_matches=False,
                                use_regex=False):
    """
    Searches for a string enclosed between a specified begin- and end-sequence.
    Also enclosed \\n are put into the result.
    Handles escaped begin- and end-sequences (and so only patterns that are
    unescaped).

    .. warning::

        Using the escape character '\\' in the begin- or end-sequences
        the function can return strange results. The backslash can
        interfere with the escaping regex-sequence used internally to
        match the enclosed string.

    :param begin:                A regex pattern that defines where to start
                                 matching.
    :param end:                  A regex pattern that defines where to end
                                 matching.
    :param string:               The string where to search in.
    :param max_matches:          Defines the maximum number of matches. If 0 or
                                 less is provided, the number of matches is not
                                 limited.
    :param remove_empty_matches: Defines whether empty entries should
                                 be removed from the result. An entry is
                                 considered empty if no inner match was
                                 performed (regardless of matched start and
                                 end patterns).
    :param use_regex:            Specifies whether to treat the begin and end
                                 patterns as regexes or simple strings.
    :return:                     An iterator returning the matched strings.
    """
    if not use_regex:
        begin = re.escape(begin)
        end = re.escape(end)
        # No need to compile the begin sequence, capturing groups get escaped.
        begin_pattern_groups = 0
    else:
        # Compilation of the begin sequence is needed to get the number of
        # capturing groups in it.
        begin_pattern_groups = re.compile(begin).groups

    # Regex explanation:
    # 1. (?<!\\)(?:\\\\)*   Unescapes the following char. The first part of
    #                       this regex is a look-behind assertion. Only match
    #                       the following if no single backslash is before it.
    #                       The second part matches all double backslashes.
    #                       In fact this sequence matches all escapes that
    #                       occur as a multiple of two, means the following
    #                       statement is not escaped.
    # 2. (begin)            A capturing group that matches the begin sequence.
    # 3. (.*?)              Match any char unlimited times, as few times as
    #                       possible. Save the match in the capturing group
    #                       after all capturing groups that can appear in
    #                       'begin'.
    # 4. (?<!\\)((?:\\\\)*) Again the unescaping regex, but now all escape-
    #                       characters get captured.
    # 5. (end)              A capturing group that matches the end sequence.
    #                       Because the 3. group is lazy (matches as few times
    #                       as possible) the next occurring end-sequence is
    #                       matched.
    regex = (r"(?<!\\)(?:\\\\)*(" + begin + r")(.*?)(?<!\\)((?:\\\\)*)(" +
             end + ")")

    matches = re.finditer(regex, string, re.DOTALL)

    if remove_empty_matches:
        matches = trim_empty_matches(
            matches, (begin_pattern_groups + 2, begin_pattern_groups + 3))

    matches = limit(matches, max_matches)

    for m in matches:
        yield InBetweenMatch.from_values(
            m.group(1), m.start(1),
            m.group(begin_pattern_groups + 2) +
            m.group(begin_pattern_groups + 3),
            m.start(begin_pattern_groups + 2),
            m.group(begin_pattern_groups + 4),
            m.start(begin_pattern_groups + 4))
示例#7
0
文件: Core.py 项目: waffle-iron/coala
def search_in_between(begin,
                      end,
                      string,
                      max_matches=0,
                      remove_empty_matches=False,
                      use_regex=False):
    """
    Searches for a string enclosed between a specified begin- and end-sequence.
    Also enclosed \\n are put into the result. Doesn't handle escape sequences.

    :param begin:                A pattern that defines where to start
                                 matching.
    :param end:                  A pattern that defines where to end matching.
    :param string:               The string where to search in.
    :param max_matches:          Defines the maximum number of matches. If 0 or
                                 less is provided, the number of matches is not
                                 limited.
    :param remove_empty_matches: Defines whether empty entries should
                                 be removed from the result. An entry is
                                 considered empty if no inner match was
                                 performed (regardless of matched start and
                                 end patterns).
    :param use_regex:            Specifies whether to treat the begin and end
                                 patterns as regexes or simple strings.
    :return:                     An iterator returning InBetweenMatch objects
                                 that hold information about the matched begin,
                                 inside and end string matched.
    """

    if not use_regex:
        begin = re.escape(begin)
        end = re.escape(end)
        # No need to compile the begin sequence, capturing groups get escaped.
        begin_pattern_groups = 0
    else:
        # Compilation of the begin sequence is needed to get the number of
        # capturing groups in it.
        begin_pattern_groups = re.compile(begin).groups

    # Regex explanation:
    # 1. (begin) A capturing group that matches the begin sequence.
    # 2. (.*?)   Match any char unlimited times, as few times as possible. Save
    #            the match in the second capturing group (`match.group(2)`).
    # 3. (end)   A capturing group that matches the end sequence.
    #            Because the previous group is lazy (matches as few times as
    #            possible) the next occurring end-sequence is matched.
    regex = "(" + begin + ")(.*?)(" + end + ")"

    matches = re.finditer(regex, string, re.DOTALL)

    if remove_empty_matches:
        matches = trim_empty_matches(matches, (begin_pattern_groups + 2, ))

    matches = limit(matches, max_matches)

    for m in matches:
        yield InBetweenMatch.from_values(m.group(1), m.start(1),
                                         m.group(begin_pattern_groups + 2),
                                         m.start(begin_pattern_groups + 2),
                                         m.group(begin_pattern_groups + 3),
                                         m.start(begin_pattern_groups + 3))
示例#8
0
def unescaped_search_in_between(begin, end, string, max_matches=0, remove_empty_matches=False, use_regex=False):
    """
    Searches for a string enclosed between a specified begin- and end-sequence.
    Also enclosed \\n are put into the result.
    Handles escaped begin- and end-sequences (and so only patterns that are
    unescaped).

    .. warning::

        Using the escape character '\\' in the begin- or end-sequences
        the function can return strange results. The backslash can
        interfere with the escaping regex-sequence used internally to
        match the enclosed string.

    :param begin:                A regex pattern that defines where to start
                                 matching.
    :param end:                  A regex pattern that defines where to end
                                 matching.
    :param string:               The string where to search in.
    :param max_matches:          Defines the maximum number of matches. If 0 or
                                 less is provided, the number of matches is not
                                 limited.
    :param remove_empty_matches: Defines whether empty entries should
                                 be removed from the result. An entry is
                                 considered empty if no inner match was
                                 performed (regardless of matched start and
                                 end patterns).
    :param use_regex:            Specifies whether to treat the begin and end
                                 patterns as regexes or simple strings.
    :return:                     An iterator returning the matched strings.
    """
    if not use_regex:
        begin = re.escape(begin)
        end = re.escape(end)
        # No need to compile the begin sequence, capturing groups get escaped.
        begin_pattern_groups = 0
    else:
        # Compilation of the begin sequence is needed to get the number of
        # capturing groups in it.
        begin_pattern_groups = re.compile(begin).groups

    # Regex explanation:
    # 1. (?<!\\)(?:\\\\)*   Unescapes the following char. The first part of
    #                       this regex is a look-behind assertion. Only match
    #                       the following if no single backslash is before it.
    #                       The second part matches all double backslashes.
    #                       In fact this sequence matches all escapes that
    #                       occur as a multiple of two, means the following
    #                       statement is not escaped.
    # 2. (begin)            A capturing group that matches the begin sequence.
    # 3. (.*?)              Match any char unlimited times, as few times as
    #                       possible. Save the match in the capturing group
    #                       after all capturing groups that can appear in
    #                       'begin'.
    # 4. (?<!\\)((?:\\\\)*) Again the unescaping regex, but now all escape-
    #                       characters get captured.
    # 5. (end)              A capturing group that matches the end sequence.
    #                       Because the 3. group is lazy (matches as few times
    #                       as possible) the next occurring end-sequence is
    #                       matched.
    regex = r"(?<!\\)(?:\\\\)*(" + begin + r")(.*?)(?<!\\)((?:\\\\)*)(" + end + ")"

    matches = re.finditer(regex, string, re.DOTALL)

    if remove_empty_matches:
        matches = trim_empty_matches(matches, (begin_pattern_groups + 2, begin_pattern_groups + 3))

    matches = limit(matches, max_matches)

    for m in matches:
        yield InBetweenMatch.from_values(
            m.group(1),
            m.start(1),
            m.group(begin_pattern_groups + 2) + m.group(begin_pattern_groups + 3),
            m.start(begin_pattern_groups + 2),
            m.group(begin_pattern_groups + 4),
            m.start(begin_pattern_groups + 4),
        )