Пример #1
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
     )
     if not intervals:
         raise InvalidArgument(
             'No valid characters in set'
         )
     self.intervals = IntervalSet(intervals)
     if blacklist_characters:
         self.blacklist_characters = set(
             b for b in blacklist_characters if ord(b) in self.intervals
         )
         if len(self.blacklist_characters) == len(self.intervals):
             raise InvalidArgument(
                 'No valid characters in set'
             )
     else:
         self.blacklist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
     self.special = []
     if '\n' not in self.blacklist_characters:
         n = ord('\n')
         try:
             self.special.append(self.intervals.index(n))
         except ValueError:
             pass
Пример #2
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
     )
     if not intervals:
         raise InvalidArgument('No valid characters in set')
     self.intervals = IntervalSet(intervals)
     if blacklist_characters:
         self.blacklist_characters = set(b for b in blacklist_characters
                                         if ord(b) in self.intervals)
         if len(self.blacklist_characters) == len(self.intervals):
             raise InvalidArgument('No valid characters in set')
     else:
         self.blacklist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
     self.special = []
     if '\n' not in self.blacklist_characters:
         n = ord('\n')
         try:
             self.special.append(self.intervals.index(n))
         except ValueError:
             pass
Пример #3
0
def test_query_matches_categories_codepoints(exclude, include, m1, m2):
    m1, m2 = sorted((m1, m2))
    values = cm.query(exclude, include, min_codepoint=m1, max_codepoint=m2)
    assert_valid_range_list(values)
    for u, v in values:
        assert m1 <= u
        assert v <= m2
Пример #4
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         raise InvalidArgument(
             'No valid characters in set'
         )
     self.intervals = IntervalSet(intervals)
     if whitelist_characters:
         self.whitelist_characters = set(whitelist_characters)
     else:
         self.whitelist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
Пример #5
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             "combination of arguments: " +
             ", ".join("%s=%r" % arg
                       for arg in arguments if arg[1] is not None))
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
Пример #6
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             "combination of arguments: "
             + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)
         )
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
Пример #7
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ('whitelist_categories', whitelist_categories),
             ('blacklist_categories', blacklist_categories),
             ('whitelist_characters', whitelist_characters),
             ('blacklist_characters', blacklist_characters),
             ('min_codepoint', min_codepoint),
             ('max_codepoint', max_codepoint),
         ]
         raise InvalidArgument(
             'No characters are allowed to be generated by this '
             'combination of arguments: ' +
             ', '.join('%s=%r' % arg
                       for arg in arguments if arg[1] is not None))
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord('0'))
Пример #8
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
     )
     if not intervals:
         raise InvalidArgument(
             'No valid characters in set'
         )
     self.intervals = IntervalSet(intervals)
     if whitelist_characters:
         self.whitelist_characters = set(whitelist_characters)
     else:
         self.whitelist_characters = set()
     if blacklist_characters:
         self.blacklist_characters = set(
             b for b in blacklist_characters if ord(b) in self.intervals
         )
         if (len(self.whitelist_characters) == 0 and
                 len(self.blacklist_characters) == len(self.intervals)):
             raise InvalidArgument(
                 'No valid characters in set'
             )
     else:
         self.blacklist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
Пример #9
0
def test_query_matches_categories_codepoints(exclude, include, m1, m2):
    m1, m2 = sorted((m1, m2))
    values = cm.query(exclude, include, min_codepoint=m1, max_codepoint=m2)
    assert_valid_range_list(values)
    for u, v in values:
        assert m1 <= u
        assert v <= m2
Пример #10
0
def test_query_matches_categories(exclude, include):
    values = cm.query(exclude, include)
    assert_valid_range_list(values)
    for u, v in values:
        for i in (u, v, (u + v) // 2):
            cat = unicodedata.category(chr(i))
            if include is not None:
                assert cat in include
            assert cat not in exclude
Пример #11
0
def test_query_matches_categories(exclude, include):
    values = cm.query(exclude, include)
    assert_valid_range_list(values)
    for u, v in values:
        for i in (u, v, (u + v) // 2):
            cat = unicodedata.category(hunichr(i))
            if include is not None:
                assert cat in include
            assert cat not in exclude
Пример #12
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     self._arg_repr = ", ".join(
         f"{k}={v!r}"
         for k, v in [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",)))
     )
     if not intervals:
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             f"combination of arguments: {self._arg_repr}"
         )
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
     self.Z_point = min(
         self.intervals.index_above(ord("Z")), len(self.intervals) - 1
     )
Пример #13
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ('whitelist_categories', whitelist_categories),
             ('blacklist_categories', blacklist_categories),
             ('whitelist_characters', whitelist_characters),
             ('blacklist_characters', blacklist_characters),
             ('min_codepoint', min_codepoint),
             ('max_codepoint', max_codepoint),
         ]
         raise InvalidArgument(
             'No characters are allowed to be generated by this '
             'combination of arguments: ' + ', '.join(
                 '%s=%r' % arg for arg in arguments if arg[1] is not None)
         )
     self.intervals = IntervalSet(intervals)
     if whitelist_characters:
         self.whitelist_characters = set(whitelist_characters)
     else:
         self.whitelist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
Пример #14
0
def test_exclude_characters_are_included_in_key():
    assert cm.query() != cm.query(exclude_characters="0")
Пример #15
0
def test_exclude_only_excludes_from_that_category(cat, i):
    c = hunichr(i)
    assume(unicodedata.category(c) != cat)
    intervals = cm.query(exclude_categories=(cat,))
    assert any(a <= i <= b for a, b in intervals)
Пример #16
0
def test_exclude_only_excludes_from_that_category(cat, i):
    c = chr(i)
    assume(unicodedata.category(c) != cat)
    intervals = cm.query(exclude_categories=(cat, ))
    assert any(a <= i <= b for a, b in intervals)
Пример #17
0
def test_exclude_characters_are_included_in_key():
    assert cm.query() != cm.query(exclude_characters='0')