def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None): intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, ) if not intervals: raise InvalidArgument( 'No valid characters in set' ) self.intervals = IntervalSet(intervals) if blacklist_characters: self.blacklist_characters = set( b for b in blacklist_characters if ord(b) in self.intervals ) if len(self.blacklist_characters) == len(self.intervals): raise InvalidArgument( 'No valid characters in set' ) else: self.blacklist_characters = set() self.zero_point = self.intervals.index_above(ord('0')) self.special = [] if '\n' not in self.blacklist_characters: n = ord('\n') try: self.special.append(self.intervals.index(n)) except ValueError: pass
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None): intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, ) if not intervals: raise InvalidArgument('No valid characters in set') self.intervals = IntervalSet(intervals) if blacklist_characters: self.blacklist_characters = set(b for b in blacklist_characters if ord(b) in self.intervals) if len(self.blacklist_characters) == len(self.intervals): raise InvalidArgument('No valid characters in set') else: self.blacklist_characters = set() self.zero_point = self.intervals.index_above(ord('0')) self.special = [] if '\n' not in self.blacklist_characters: n = ord('\n') try: self.special.append(self.intervals.index(n)) except ValueError: pass
def test_query_matches_categories_codepoints(exclude, include, m1, m2): m1, m2 = sorted((m1, m2)) values = cm.query(exclude, include, min_codepoint=m1, max_codepoint=m2) assert_valid_range_list(values) for u, v in values: assert m1 <= u assert v <= m2
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None): intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: raise InvalidArgument( 'No valid characters in set' ) self.intervals = IntervalSet(intervals) if whitelist_characters: self.whitelist_characters = set(whitelist_characters) else: self.whitelist_characters = set() self.zero_point = self.intervals.index_above(ord('0'))
def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None, ): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ("whitelist_categories", whitelist_categories), ("blacklist_categories", blacklist_categories), ("whitelist_characters", whitelist_characters), ("blacklist_characters", blacklist_characters), ("min_codepoint", min_codepoint), ("max_codepoint", max_codepoint), ] raise InvalidArgument( "No characters are allowed to be generated by this " "combination of arguments: " + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0"))
def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None, ): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ("whitelist_categories", whitelist_categories), ("blacklist_categories", blacklist_categories), ("whitelist_characters", whitelist_characters), ("blacklist_characters", blacklist_characters), ("min_codepoint", min_codepoint), ("max_codepoint", max_codepoint), ] raise InvalidArgument( "No characters are allowed to be generated by this " "combination of arguments: " + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None) ) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0"))
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ('whitelist_categories', whitelist_categories), ('blacklist_categories', blacklist_categories), ('whitelist_characters', whitelist_characters), ('blacklist_characters', blacklist_characters), ('min_codepoint', min_codepoint), ('max_codepoint', max_codepoint), ] raise InvalidArgument( 'No characters are allowed to be generated by this ' 'combination of arguments: ' + ', '.join('%s=%r' % arg for arg in arguments if arg[1] is not None)) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord('0'))
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None): intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, ) if not intervals: raise InvalidArgument( 'No valid characters in set' ) self.intervals = IntervalSet(intervals) if whitelist_characters: self.whitelist_characters = set(whitelist_characters) else: self.whitelist_characters = set() if blacklist_characters: self.blacklist_characters = set( b for b in blacklist_characters if ord(b) in self.intervals ) if (len(self.whitelist_characters) == 0 and len(self.blacklist_characters) == len(self.intervals)): raise InvalidArgument( 'No valid characters in set' ) else: self.blacklist_characters = set() self.zero_point = self.intervals.index_above(ord('0'))
def test_query_matches_categories(exclude, include): values = cm.query(exclude, include) assert_valid_range_list(values) for u, v in values: for i in (u, v, (u + v) // 2): cat = unicodedata.category(chr(i)) if include is not None: assert cat in include assert cat not in exclude
def test_query_matches_categories(exclude, include): values = cm.query(exclude, include) assert_valid_range_list(values) for u, v in values: for i in (u, v, (u + v) // 2): cat = unicodedata.category(hunichr(i)) if include is not None: assert cat in include assert cat not in exclude
def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None, ): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) self._arg_repr = ", ".join( f"{k}={v!r}" for k, v in [ ("whitelist_categories", whitelist_categories), ("blacklist_categories", blacklist_categories), ("whitelist_characters", whitelist_characters), ("blacklist_characters", blacklist_characters), ("min_codepoint", min_codepoint), ("max_codepoint", max_codepoint), ] if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",))) ) if not intervals: raise InvalidArgument( "No characters are allowed to be generated by this " f"combination of arguments: {self._arg_repr}" ) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0")) self.Z_point = min( self.intervals.index_above(ord("Z")), len(self.intervals) - 1 )
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ('whitelist_categories', whitelist_categories), ('blacklist_categories', blacklist_categories), ('whitelist_characters', whitelist_characters), ('blacklist_characters', blacklist_characters), ('min_codepoint', min_codepoint), ('max_codepoint', max_codepoint), ] raise InvalidArgument( 'No characters are allowed to be generated by this ' 'combination of arguments: ' + ', '.join( '%s=%r' % arg for arg in arguments if arg[1] is not None) ) self.intervals = IntervalSet(intervals) if whitelist_characters: self.whitelist_characters = set(whitelist_characters) else: self.whitelist_characters = set() self.zero_point = self.intervals.index_above(ord('0'))
def test_exclude_characters_are_included_in_key(): assert cm.query() != cm.query(exclude_characters="0")
def test_exclude_only_excludes_from_that_category(cat, i): c = hunichr(i) assume(unicodedata.category(c) != cat) intervals = cm.query(exclude_categories=(cat,)) assert any(a <= i <= b for a, b in intervals)
def test_exclude_only_excludes_from_that_category(cat, i): c = chr(i) assume(unicodedata.category(c) != cat) intervals = cm.query(exclude_categories=(cat, )) assert any(a <= i <= b for a, b in intervals)
def test_exclude_characters_are_included_in_key(): assert cm.query() != cm.query(exclude_characters='0')