Exemplo n.º 1
0
def _stringprep(s, check_unassigned, mapping, normalization, prohibited, bidi):
    """Implement a stringprep profile as defined in RFC 3454"""

    if not isinstance(s, str):
        raise TypeError('argument 0 must be str, not %s' % type(s).__name__)

    if check_unassigned:
        for c in s:
            if stringprep.in_table_a1(c):
                raise SASLPrepError('Unassigned character: %r' % c)

    if mapping:
        s = mapping(s)

    if normalization:
        s = unicodedata.normalize(normalization, s)

    if prohibited:
        for c in s:
            for lookup in prohibited:
                if lookup(c):
                    raise SASLPrepError('Prohibited character: %r' % c)

    if bidi:
        _check_bidi(s)

    return s
Exemplo n.º 2
0
def _stringprep(s, check_unassigned, mapping, normalization, prohibited, bidi):
    """Implement a stringprep profile as defined in RFC 3454"""

    if not isinstance(s, str):
        raise TypeError('argument 0 must be str, not %s' % type(s).__name__)

    if check_unassigned:  # pragma: no branch
        for c in s:
            if stringprep.in_table_a1(c):
                raise SASLPrepError('Unassigned character: %r' % c)

    if mapping:  # pragma: no branch
        s = mapping(s)

    if normalization:  # pragma: no branch
        s = unicodedata.normalize(normalization, s)

    if prohibited:  # pragma: no branch
        for c in s:
            for lookup in prohibited:
                if lookup(c):
                    raise SASLPrepError('Prohibited character: %r' % c)

    if bidi:  # pragma: no branch
        _check_bidi(s)

    return s
Exemplo n.º 3
0
def saslprep(s, allow_unassigned = False):
    '''
    Prepare Unicode string s according to SASLprep: Stringprep Profile for
    User Names and Passwords, a.k.a. RFC 4013

    If the optional parameter allow_unassigned is set to True,
    unassigned codepoints will be allowed. This is recommended for
    query terms and other non-storing situations only.

    The return value is a Unicode string appropriately prepared.

    Disallowed input leads to a ValueError.
    '''
    if type(s) != type(u''):
        raise TypeError("input must be a Unicode string")
    # phase 1: mapping
    s = u''.join([ stringprep.in_table_c12(ch) and u' ' or ch for ch in unichars(s) if not stringprep.in_table_b1(ch) ])
    # phase 2: normalization
    s = unicodedata.normalize('NFKC', s)
    # phase 3: prohibition
    for ch in unichars(s):
        if stringprep.in_table_c12(ch):
            raise ValueError("prohibited non-ASCII space character")
        if stringprep.in_table_c21(ch):
            raise ValueError("prohibited ASCII control character")
        if stringprep.in_table_c22(ch):
            raise ValueError("prohibited non-ASCII control character")
        if stringprep.in_table_c3(ch):
            raise ValueError("prohibited private use character")
        if stringprep.in_table_c4(ch):
            raise ValueError("prohibited non-character code point")
        if stringprep.in_table_c5(ch):
            raise ValueError("prohibited surrogate code point")
        if stringprep.in_table_c6(ch):
            raise ValueError("prohibited character inappropriate for plain text")
        if stringprep.in_table_c7(ch):
            raise ValueError("prohibited character inappropriate for canonical representation")
        if stringprep.in_table_c8(ch):
            raise ValueError("prohibited character changing display properties, or a deprecated character")
        if stringprep.in_table_c9(ch):
            raise ValueError("prohibited tagging character")
    # phase 4: bidi check
    bidi_map = ''.join([ stringprep.in_table_d1(ch) and 'r' or stringprep.in_table_d2(ch) and 'l' or 'x' for ch in unichars(s) ])
    if 'r' in bidi_map:
        if 'l' in bidi_map:
            raise ValueError("prohibited mixture of strong left-to-right and right-to-left text")
        if bidi_map[0] != 'r' or bidi_map[-1] != 'r':
            raise ValueError("string containing right-to-left text must start and end with right-to-left text")
    # phase 5: unassigned check
    if not allow_unassigned:
        for ch in unichars(s):
            if stringprep.in_table_a1(ch):
                raise ValueError("prohibited unassigned code point")
    return s
Exemplo n.º 4
0
def nodeprep( foo, errors='strict' ):
    if foo is None:
        return None
    if isinstance( foo, str ):
        foo = foo.decode( 'utf-8' )
    ofoo = u''
    for x in foo:
        if not stringprep.in_table_b1( x ):
            ofoo += stringprep.map_table_b2( x )
    foo = unicodedata.normalize( 'NFKC', ofoo )
    ofoo = u''
    first_is_randal = False
    if len(foo):
        first_is_randal = stringprep.in_table_d1( foo[0] )
        if first_is_randal:
            if not stringprep.in_table_d1( foo[-1] ):
                raise UnicodeError, "Section 6.3 [end]"
    for x in range(len(foo)):
        if errors=='strict' and stringprep.in_table_a1( foo[x] ):
            raise UnicodeError, "Unassigned Codepoint"
        if stringprep.in_table_c11( foo[x] ):
            raise UnicodeError, "In table C.1.1"
        if stringprep.in_table_c12( foo[x] ):
            raise UnicodeError, "In table C.1.2"
        if stringprep.in_table_c21( foo[x] ):
            raise UnicodeError, "In table C.2.1"
        if stringprep.in_table_c22( foo[x] ):
            raise UnicodeError, "In table C.2.2"
        if stringprep.in_table_c3( foo[x] ):
            raise UnicodeError, "In table C.3"
        if stringprep.in_table_c4( foo[x] ):
            raise UnicodeError, "In table C.4"
        if stringprep.in_table_c5( foo[x] ):
            raise UnicodeError, "In table C.5"
        if stringprep.in_table_c6( foo[x] ):
            raise UnicodeError, "In table C.6"
        if stringprep.in_table_c7( foo[x] ):
            raise UnicodeError, "In table C.7"
        if stringprep.in_table_c8( foo[x] ):
            raise UnicodeError, "In table C.8"
        if stringprep.in_table_c9( foo[x] ):
            raise UnicodeError, "In table C.9"
        if foo[x] in "\"&'/;<>@":
            raise UnicodeError, "In nodeprep banned list"
        if x:
            if first_is_randal and stringprep.in_table_d2( foo[x] ):
                raise UnicodeError, "Section 6.2"
            if not first_is_randal and x!=(len(foo)-1) and stringprep.in_table_d1( foo[x] ):
                raise UnicodeError, "Section 6.3"
        else:
            first = False
    return foo
Exemplo n.º 5
0
    def is_valid_rfc3491(domainname):
        """
        Checks if the given domain would pass processing by nameprep unscathed.

        :param domainname: The unicode string of the domain name.
        :return: True if the unicode is valid (i.e. only uses Unicode 3.2 code points)
        """
        valid_rfc3491 = True
        for char in domainname:
            if stringprep.in_table_a1(char):
                valid_rfc3491 = False
                break

        return valid_rfc3491
Exemplo n.º 6
0
    def is_valid_rfc3491(domainname):
        """
        Checks if the given domain would pass processing by nameprep unscathed.

        :param domainname: The unicode string of the domain name.
        :return: True if the unicode is valid (i.e. only uses Unicode 3.2 code points)
        """
        valid_rfc3491 = True
        for char in domainname:
            if stringprep.in_table_a1(char):
                valid_rfc3491 = False
                break

        return valid_rfc3491
Exemplo n.º 7
0
def canonicaliseUsername(username, ignoreSpaces = False, throws = True):
    # Read stringprep documentation for the meaning of the tables

    chars = list(username)
    for c, char in enumerate(chars):
        if stringprep.in_table_a1(char):
            if throws:
                raise ValueError
            else:
                chars[c] = u""
        elif stringprep.in_table_b1(char):
            chars[c] = u""
        else:
            chars[c] = stringprep.map_table_b2(char)

    chars = list(stringprep.unicodedata.normalize("NFKC", u"".join(chars)))

    for c, char in enumerate(chars):
        if ((not ignoreSpaces and stringprep.in_table_c11_c12(char)) or
            stringprep.in_table_c21_c22(char) or
            stringprep.in_table_c3(char) or
            stringprep.in_table_c4(char) or
            stringprep.in_table_c5(char) or
            stringprep.in_table_c6(char) or
            stringprep.in_table_c7(char) or
            stringprep.in_table_c8(char) or
            stringprep.in_table_c9(char) or
            stringprep.unicodedata.category(char) in ("Ps", "Pe", "Pi", "Pf", "Po")):
            if throws:
                raise ValueError
            else:
                chars[c] = u""

    chars = u"".join(chars)

    if throws:
        RandAL = map(stringprep.in_table_d1, chars)
        for c in RandAL:
            if c:
                if filter(stringprep.in_table_d2, chars):
                    raise ValueError
                if not RandAL[0] or not RandAL[-1]:
                    raise ValueError

    return chars
Exemplo n.º 8
0
def saslprep(text, strict=True):
    """
    Return a processed version of the given string, using the SASLPrep
    profile of stringprep.

    :param text: The string to process, in UTF-8.
    :param strict: If ``True``, prevent the use of unassigned code points.
    """

    if sys.version_info < (3, 0):
        if type(text) == str:
            text = text.decode('utf-8')

    # Mapping:
    #
    #  -  non-ASCII space characters [StringPrep, C.1.2] that can be
    #     mapped to SPACE (U+0020), and
    #
    #  -  the 'commonly mapped to nothing' characters [StringPrep, B.1]
    #     that can be mapped to nothing.
    buffer = ''
    for char in text:
        if stringprep.in_table_c12(char):
            buffer += ' '
        elif not stringprep.in_table_b1(char):
            buffer += char

    # Normalization using form KC
    text = unicodedata.normalize('NFKC', buffer)

    # Check for bidirectional string
    buffer = ''
    first_is_randal = False
    if text:
        first_is_randal = stringprep.in_table_d1(text[0])
        if first_is_randal and not stringprep.in_table_d1(text[-1]):
            raise SASLPrepFailure('Section 6.3 [end]')

    # Check for prohibited characters
    for x in range(len(text)):
        if strict and stringprep.in_table_a1(text[x]):
            raise SASLPrepFailure('Unassigned Codepoint')
        if stringprep.in_table_c12(text[x]):
            raise SASLPrepFailure('In table C.1.2')
        if stringprep.in_table_c21(text[x]):
            raise SASLPrepFailure('In table C.2.1')
        if stringprep.in_table_c22(text[x]):
            raise SASLPrepFailure('In table C.2.2')
        if stringprep.in_table_c3(text[x]):
            raise SASLPrepFailure('In table C.3')
        if stringprep.in_table_c4(text[x]):
            raise SASLPrepFailure('In table C.4')
        if stringprep.in_table_c5(text[x]):
            raise SASLPrepFailure('In table C.5')
        if stringprep.in_table_c6(text[x]):
            raise SASLPrepFailure('In table C.6')
        if stringprep.in_table_c7(text[x]):
            raise SASLPrepFailure('In table C.7')
        if stringprep.in_table_c8(text[x]):
            raise SASLPrepFailure('In table C.8')
        if stringprep.in_table_c9(text[x]):
            raise SASLPrepFailure('In table C.9')
        if x:
            if first_is_randal and stringprep.in_table_d2(text[x]):
                raise SASLPrepFailure('Section 6.2')
            if not first_is_randal and \
               x != len(text) - 1 and \
               stringprep.in_table_d1(text[x]):
                raise SASLPrepFailure('Section 6.3')

    return text
Exemplo n.º 9
0
	def check_unassigneds(self, string):
		for c in string:
			if stringprep.in_table_a1(c):
				raise UnicodeError, "Unassigned code point %s" % repr(c)
Exemplo n.º 10
0
def prohibited_output_profile(string):
    """RFC4013 Prohibited output profile implementation."""
    # Implements:
    # RFC4013, 2.3. Prohibited Output.
    # This profile specifies the following characters as prohibited input:
    #   - Non-ASCII space characters [StringPrep, C.1.2]
    #   - ASCII control characters [StringPrep, C.2.1]
    #   - Non-ASCII control characters [StringPrep, C.2.2]
    #   - Private Use characters [StringPrep, C.3]
    #   - Non-character code points [StringPrep, C.4]
    #   - Surrogate code points [StringPrep, C.5]
    #   - Inappropriate for plain text characters [StringPrep, C.6]
    #   - Inappropriate for canonical representation characters [StringPrep, C.7]
    #   - Change display properties or deprecated characters [StringPrep, C.8]
    #   - Tagging characters [StringPrep, C.9]
    # RFC4013, 2.4. Bidirectional Characters.
    # RFC4013, 2.5. Unassigned Code Points.

    # Determine how to handle bidirectional characters (RFC3454):
    if is_ral_string(string):
        # If a string contains any RandALCat characters,
        # The string MUST NOT contain any LCat character:
        is_prohibited_bidi_ch = in_table_d2
        bidi_table = 'D.2'
    else:
        # Forbid RandALCat characters in LCat string:
        is_prohibited_bidi_ch = in_table_d1
        bidi_table = 'D.1'

    RFC = 'RFC4013'
    for c in string:
        # RFC4013 2.3. Prohibited Output:
        if in_table_c12(c):
            raise ValueError('%s: prohibited non-ASCII space characters '
                             'that cannot be replaced (C.1.2).' % RFC)
        if in_table_c21_c22(c):
            raise ValueError('%s: prohibited control characters (C.2.1).' %
                             RFC)
        if in_table_c3(c):
            raise ValueError('%s: prohibited private Use characters (C.3).' %
                             RFC)
        if in_table_c4(c):
            raise ValueError(
                '%s: prohibited non-character code points (C.4).' % RFC)
        if in_table_c5(c):
            raise ValueError('%s: prohibited surrogate code points (C.5).' %
                             RFC)
        if in_table_c6(c):
            raise ValueError('%s: prohibited inappropriate for plain text '
                             'characters (C.6).' % RFC)
        if in_table_c7(c):
            raise ValueError('%s: prohibited inappropriate for canonical '
                             'representation characters (C.7).' % RFC)
        if in_table_c8(c):
            raise ValueError('%s: prohibited change display properties / '
                             'deprecated characters (C.8).' % RFC)
        if in_table_c9(c):
            raise ValueError('%s: prohibited tagging characters (C.9).' % RFC)

        # RFC4013, 2.4. Bidirectional Characters:
        if is_prohibited_bidi_ch(c):
            raise ValueError('%s: prohibited bidi characters (%s).' %
                             (RFC, bidi_table))

        # RFC4013, 2.5. Unassigned Code Points:
        if in_table_a1(c):
            raise ValueError('%s: prohibited unassigned code points (A.1).' %
                             RFC)
Exemplo n.º 11
0
def saslprep(text, strict=True):
    """
    Return a processed version of the given string, using the SASLPrep
    profile of stringprep.

    :param text: The string to process, in UTF-8.
    :param strict: If ``True``, prevent the use of unassigned code points.
    """

    if sys.version_info < (3, 0):
        if type(text) == str:
            text = text.decode("utf-8")

    # Mapping:
    #
    #  -  non-ASCII space characters [StringPrep, C.1.2] that can be
    #     mapped to SPACE (U+0020), and
    #
    #  -  the 'commonly mapped to nothing' characters [StringPrep, B.1]
    #     that can be mapped to nothing.
    buffer = ""
    for char in text:
        if stringprep.in_table_c12(char):
            buffer += " "
        elif not stringprep.in_table_b1(char):
            buffer += char

    # Normalization using form KC
    text = unicodedata.normalize("NFKC", buffer)

    # Check for bidirectional string
    buffer = ""
    first_is_randal = False
    if text:
        first_is_randal = stringprep.in_table_d1(text[0])
        if first_is_randal and not stringprep.in_table_d1(text[-1]):
            raise SASLPrepFailure("Section 6.3 [end]")

    # Check for prohibited characters
    for x in range(len(text)):
        if strict and stringprep.in_table_a1(text[x]):
            raise SASLPrepFailure("Unassigned Codepoint")
        if stringprep.in_table_c12(text[x]):
            raise SASLPrepFailure("In table C.1.2")
        if stringprep.in_table_c21(text[x]):
            raise SASLPrepFailure("In table C.2.1")
        if stringprep.in_table_c22(text[x]):
            raise SASLPrepFailure("In table C.2.2")
        if stringprep.in_table_c3(text[x]):
            raise SASLPrepFailure("In table C.3")
        if stringprep.in_table_c4(text[x]):
            raise SASLPrepFailure("In table C.4")
        if stringprep.in_table_c5(text[x]):
            raise SASLPrepFailure("In table C.5")
        if stringprep.in_table_c6(text[x]):
            raise SASLPrepFailure("In table C.6")
        if stringprep.in_table_c7(text[x]):
            raise SASLPrepFailure("In table C.7")
        if stringprep.in_table_c8(text[x]):
            raise SASLPrepFailure("In table C.8")
        if stringprep.in_table_c9(text[x]):
            raise SASLPrepFailure("In table C.9")
        if x:
            if first_is_randal and stringprep.in_table_d2(text[x]):
                raise SASLPrepFailure("Section 6.2")
            if not first_is_randal and x != len(text) - 1 and stringprep.in_table_d1(text[x]):
                raise SASLPrepFailure("Section 6.3")

    return text
Exemplo n.º 12
0
 def update_event(self, inp=-1):
     self.set_output_val(0, stringprep.in_table_a1(self.input(0)))
Exemplo n.º 13
0
import stringprep
import unicodedata

print(u'\ua62b')
print(unicodedata.category(u"\ua62b")) # Lo
print(stringprep.in_table_a1(u'\ua62b'))
print(stringprep.in_table_a1(chr(0x1F475)))

print(u'\ua62c') #꘬  因为没有定义 print(unicodedata.name(u"\ua62c"))
print(stringprep.in_table_a1(u'\ua62c'))
print(unicodedata.category(u"\ua62c")) # Cn
#https://baike.baidu.com/item/Unicode/750500?fr=aladdin
print(u'\u4e00')
# https://home.unicode.org/#
print(u'\uA755')
print(u'\u03c0')
print(chr(0x03c0))
print(chr(0x1F4AF)) # 💯

Exemplo n.º 14
0
 def check_unassigned(unistr):
     for c in unistr:
         if stringprep.in_table_a1(c):
             raise IdentifierError(IdentifierErrors.restriction_unassigned)
Exemplo n.º 15
0
 def __is_unassigned_code_point(char: str):
     return stringprep.in_table_a1(char)
Exemplo n.º 16
0
 def check_unassigneds(self, string):
     for c in string:
         if stringprep.in_table_a1(c):
             raise UnicodeError("Unassigned code point %s" % repr(c))
Exemplo n.º 17
0
def saslprep(s, allow_unassigned=False):
    '''
    Prepare Unicode string s according to SASLprep: Stringprep Profile for
    User Names and Passwords, a.k.a. RFC 4013

    If the optional parameter allow_unassigned is set to True,
    unassigned codepoints will be allowed. This is recommended for
    query terms and other non-storing situations only.

    The return value is a Unicode string appropriately prepared.

    Disallowed input leads to a ValueError.
    '''
    if type(s) != type(u''):
        raise TypeError("input must be a Unicode string")
    # phase 1: mapping
    s = u''.join([
        stringprep.in_table_c12(ch) and u' ' or ch for ch in unichars(s)
        if not stringprep.in_table_b1(ch)
    ])
    # phase 2: normalization
    s = unicodedata.normalize('NFKC', s)
    # phase 3: prohibition
    for ch in unichars(s):
        if stringprep.in_table_c12(ch):
            raise ValueError("prohibited non-ASCII space character")
        if stringprep.in_table_c21(ch):
            raise ValueError("prohibited ASCII control character")
        if stringprep.in_table_c22(ch):
            raise ValueError("prohibited non-ASCII control character")
        if stringprep.in_table_c3(ch):
            raise ValueError("prohibited private use character")
        if stringprep.in_table_c4(ch):
            raise ValueError("prohibited non-character code point")
        if stringprep.in_table_c5(ch):
            raise ValueError("prohibited surrogate code point")
        if stringprep.in_table_c6(ch):
            raise ValueError(
                "prohibited character inappropriate for plain text")
        if stringprep.in_table_c7(ch):
            raise ValueError(
                "prohibited character inappropriate for canonical representation"
            )
        if stringprep.in_table_c8(ch):
            raise ValueError(
                "prohibited character changing display properties, or a deprecated character"
            )
        if stringprep.in_table_c9(ch):
            raise ValueError("prohibited tagging character")
    # phase 4: bidi check
    bidi_map = ''.join([
        stringprep.in_table_d1(ch) and 'r'
        or stringprep.in_table_d2(ch) and 'l' or 'x' for ch in unichars(s)
    ])
    if 'r' in bidi_map:
        if 'l' in bidi_map:
            raise ValueError(
                "prohibited mixture of strong left-to-right and right-to-left text"
            )
        if bidi_map[0] != 'r' or bidi_map[-1] != 'r':
            raise ValueError(
                "string containing right-to-left text must start and end with right-to-left text"
            )
    # phase 5: unassigned check
    if not allow_unassigned:
        for ch in unichars(s):
            if stringprep.in_table_a1(ch):
                raise ValueError("prohibited unassigned code point")
    return s