Exemplo n.º 1
0
    def __init__(
        self,
        whitelist_categories=None,
        blacklist_categories=None,
        blacklist_characters=None,
        min_codepoint=None,
        max_codepoint=None,
    ):
        whitelist_categories = set(whitelist_categories or [])
        blacklist_categories = set(blacklist_categories or [])
        blacklist_characters = set(blacklist_characters or [])

        min_codepoint = int(min_codepoint or 0)
        max_codepoint = int(max_codepoint or sys.maxunicode)

        self.ascii_tree = charstree.filter_tree(
            charstree.ascii_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.unicode_tree = charstree.filter_tree(
            charstree.unicode_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.spaces_tree = charstree.filter_tree(
            self.unicode_tree,
            whitelist_categories=set(["Zs", "Cc"]),
            blacklist_characters=blacklist_characters,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
        )
        self.blacklist_characters = blacklist_characters
        self.min_codepoint = min_codepoint
        self.max_codepoint = max_codepoint
        if not self.unicode_tree:
            raise InvalidArgument(
                "No characters could be produced."
                " Try to reduce white/black categories list"
                " or min/max allowed code points."
            )
Exemplo n.º 2
0
def test_filter_tree():
    tree = charstree.ascii_tree()
    new_tree = charstree.filter_tree(tree,
                                     min_codepoint=ord('0'),
                                     max_codepoint=ord('9'))
    expected = list(range(ord('0'), ord('9') + 1))
    actual = list(charstree.codepoints(new_tree))
    assert expected == actual
    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None):
        whitelist_categories = set(whitelist_categories or [])
        blacklist_categories = set(blacklist_categories or [])
        blacklist_characters = set(blacklist_characters or [])

        min_codepoint = int(min_codepoint or 0)
        max_codepoint = int(max_codepoint or sys.maxunicode)

        self.ascii_tree = charstree.filter_tree(
            charstree.ascii_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.unicode_tree = charstree.filter_tree(
            charstree.unicode_tree(),
            whitelist_categories,
            blacklist_categories,
            blacklist_characters,
            min_codepoint,
            max_codepoint,
        )
        self.spaces_tree = charstree.filter_tree(
            self.unicode_tree,
            whitelist_categories=set(['Zs', 'Cc']),
            blacklist_characters=blacklist_characters,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
        )
        self.blacklist_characters = blacklist_characters
        self.min_codepoint = min_codepoint
        self.max_codepoint = max_codepoint
        if not self.unicode_tree:
            raise InvalidArgument('No characters could be produced.'
                                  ' Try to reduce white/black categories list'
                                  ' or min/max allowed code points.')
Exemplo n.º 4
0
    def try_ascii(self, random, template):
        tree = self.ascii_tree

        if not tree:
            return

        zero_point = self.zero_point
        template = ord(template)

        if template < zero_point:
            min_codepoint, max_codepoint = template, zero_point
        elif template > zero_point:
            min_codepoint, max_codepoint = zero_point, template
        else:
            return

        subtree = charstree.filter_tree(tree, min_codepoint=min_codepoint, max_codepoint=max_codepoint)

        for codepoint in charstree.codepoints(subtree):
            yield hunichr(codepoint)
    def try_ascii(self, random, template):
        tree = self.ascii_tree

        if not tree:
            return

        zero_point = self.zero_point
        template = ord(template)

        if template < zero_point:
            min_codepoint, max_codepoint = template, zero_point
        elif template > zero_point:
            min_codepoint, max_codepoint = zero_point, template
        else:
            return

        subtree = charstree.filter_tree(tree,
                                        min_codepoint=min_codepoint,
                                        max_codepoint=max_codepoint)

        for codepoint in charstree.codepoints(subtree):
            yield hunichr(codepoint)