示例#1
0
+--------+------------------------------------------------------+
| code   | meaning                                              |
+========+======================================================+
| ^      | Start of string, or line                             |
+--------+------------------------------------------------------+
| $      | End of string, or line                               |
+--------+------------------------------------------------------+
| \A     | Start of string                                      |
+--------+------------------------------------------------------+
| \Z     | End of string                                        |
+--------+------------------------------------------------------+
| \b     | Empty string at the beginning or end of a word       |
+--------+------------------------------------------------------+
| \B     | Empty string NOT at the beginning or end of a word   |
+--------+------------------------------------------------------+

"""

from py_09_rePatterns import test_patterns

if __name__ == "__main__":
    test_patterns([(r'^\w+', 'word at start of string'),
                   (r'\A\w+', 'word at start of string'),
                   (r'\w+\S*$', 'word near end of string'),
                   (r'\w+\S*\Z', 'word near end of string'),
                   (r'\w*t\w*', 'word containing t'),
                   (r'\bt\w+', 't at start of word'),
                   (r'\w+t\b', 't at end of word'),
                   (r'\Bt\B', 't, not start or end of word')],
                  'This is some text -- with punctuation.')
示例#2
0
| code   | meaning                               |
+========+=======================================+
| \d     | A digit                               |
+--------+---------------------------------------+
| \D     | A non-digit                           |
+--------+---------------------------------------+
| \s     | Whitespace (tab, space, newline, etc.)|
+--------+---------------------------------------+
| \S     | Non-whitespace                        |
+--------+---------------------------------------+
| \w     | Alphanumeric                          |
+--------+---------------------------------------+
| \W     | Non-alphanumeric                      |
+--------+---------------------------------------+

"""

from py_09_rePatterns import test_patterns

if __name__ == "__main__":
    test_patterns(
        [
            (r'\d+', 'sequence of digits'),
            (r'\D+', 'sequence of non-digits'),
            (r'\s+', 'sequence of whitespace'),
            (r'\S+', 'sequence of non-whitespace'),
            (r'\w+', 'alphanumeric characters'),
            (r'\W+', 'non-alphanumeric')
        ],
        'A prime #1 example!'
    )
示例#3
0
        r'(?P<ends_with_t>\w+t)\b',
    ]

    for pattern in patterns:
        regex = re.compile(pattern)
        match = regex.search(text)
        print("'{}'".format(pattern))
        print('  ', match.groups())
        print('  ', match.groupdict())
        print()


if __name__ == "__main__":
    test_patterns([
        ('a(ab)', 'a followed by literal ab'),
        ('a(a*b*)', 'a followed by 0-n a and 0-n b'),
        ('a(ab)*', 'a followed by 0-n ab'),
        ('a(ab)+', 'a followed by 1-n ab'),
    ], 'abbaaabbbbaaaaa')
    # match.groups()
    re_groups_match()
    # index reference: match.group(index)
    re_group_individual()
    # name reference: match.groupdict()
    re_groups_named()
    # match.groupdict()
    test_patterns_updated_ver(
        [(r'a((a*)(b*))', 'a followed by optional 0-n a and 0-n b')],
        'abbaabbba')
    # match.groups() is useful for specifying alternative patterns. using pipe symbol(|)
    test_patterns_updated_ver([
        (r'a((a+)|(b+))', 'a then seq. of a or seq. of b'),
示例#4
0
from py_09_rePatterns import test_patterns

if __name__ == "__main__":
    test_patterns([
        ('[ab]', 'either a or b'),
        ('a[ab]+', 'a followed by 1 or more a or b'),
        ('a[ab]+?', 'a followed by 1 or more a or b, not greedy'),
    ], 'abbaabbba')
    # exclude using ^. [^ ]
    test_patterns([
        ('[^-. ]+', 'sequences without -, ., or space'),
    ], 'This is some text -- with punctuation.')
    # ranges
    test_patterns([
        ('[a-z]+', 'sequences of lowercase letters'),
        ('[A-Z]+', 'sequences of uppercase letters'),
        ('[a-zA-Z]+', 'sequences of lower- or uppercase letters'),
        ('[A-Z][a-z]+', 'one uppercase followed by lowercase'),
    ], 'This is some text -- with punctuation.')
    # dot
    test_patterns([('a.', 'a followed by any one character'),
                   ('b.', 'b followed by any one character'),
                   ('a.*b', 'a followed by anything, ending in b'),
                   ('a.*?b', 'a followed by anything, ending in b')],
                  'abbaabbba')
示例#5
0
# import sys, os
# sys.path.append(os.path.dirname(os.path.dirname(__file__)))
# from pkg.breaker import addBreaker
from py_09_rePatterns import test_patterns, text

if __name__ == "__main__":
    # default behavior is greedy
    test_patterns(
        [('ab*', 'a followed by zero or more b'),
        ('ab+', 'a followed by one or more b'),
        ('ab?', 'a followed by zero or one b'),
        ('ab{3}', 'a followed by three b'),
        ('ab{2, 3}', 'a followed by two or three b')],
        text='abbaabbba'
    )
    # non-greedy via turning off by following the repetition instruction with ?
    test_patterns(
        [('ab*?', 'a followed by zero or more b'),
        ('ab+?', 'a followed by one or more b'),
        ('ab??', 'a followed by zero or one b'),
        ('ab{3}?', 'a followed by three b'),
        ('ab{2, 3}?', 'a followed by two or three b')],
        text='abbaabbba'
    )