Python normalize 예제들, unicodedata.ucd_3_2_0.normalize Python 예제들

예제 #1

0

파일 보기

파일: stringprep.py 프로젝트: richarddennis/TRISIS-TRITON-HATMAN-malware

def map_table_b2(a):
    al = map_table_b3(a)
    b = unicodedata.normalize('NFKC', al)
    bl = (u'').join([map_table_b3(ch) for ch in b])
    c = unicodedata.normalize('NFKC', bl)
    if b != c:
        return c
    return al

예제 #2

0

파일 보기

def map_table_b2(a):
    al = map_table_b3(a)
    b = unicodedata.normalize("NFKC", al)
    bl = "".join([map_table_b3(ch) for ch in b])
    c = unicodedata.normalize("NFKC", bl)
    if b != c:
        return c
    else:
        return al

예제 #3

0

파일 보기

파일: strprep.py 프로젝트: RestAuth/RestAuthCommon

def stringcheck(name):
    """Same as :py:func:`stringprep` but raises PreconditionFailed if name contains invalid characters."""
    name = prep_pattern.sub('', name)

    if check_pattern.search(name) is None:
        return unicodedata.normalize('NFC', name).lower()
    raise PreconditionFailed("Invalid characters")

예제 #4

0

파일 보기

def stringcheck(name):
    """Same as :py:func:`stringprep` but raises PreconditionFailed if name contains invalid characters."""
    name = prep_pattern.sub('', name)

    if check_pattern.search(name) is None:
        return unicodedata.normalize('NFC', name).lower()
    raise PreconditionFailed("Invalid characters")

예제 #5

0

파일 보기

def nameprep(label):
    newlabel = []
    for c in label:
        if stringprep.in_table_b1(c):
            continue
        newlabel.append(stringprep.map_table_b2(c))

    label = u''.join(newlabel)
    label = unicodedata.normalize('NFKC', label)
    for c in label:
        if stringprep.in_table_c12(c) or stringprep.in_table_c22(
                c) or stringprep.in_table_c3(c) or stringprep.in_table_c4(
                    c) or stringprep.in_table_c5(c) or stringprep.in_table_c6(
                        c) or stringprep.in_table_c7(
                            c) or stringprep.in_table_c8(
                                c) or stringprep.in_table_c9(c):
            raise UnicodeError('Invalid character %r' % c)

    RandAL = map(stringprep.in_table_d1, label)
    for c in RandAL:
        if c:
            if filter(stringprep.in_table_d2, label):
                raise UnicodeError('Violation of BIDI requirement 2')
            if not RandAL[0] or not RandAL[-1]:
                raise UnicodeError('Violation of BIDI requirement 3')

    return label

예제 #6

0

파일 보기

파일: jid.py 프로젝트: abusesa/idiokit

def resourceprep(string):
    in_table_b1 = stringprep.in_table_b1

    string = u"".join(ch for ch in string if not in_table_b1(ch))
    string = unicodedata.normalize("NFKC", string)
    check_prohibited_and_unassigned(string, RESOURCEPREP_PROHIBITED)
    check_bidirectional(string)
    return string

예제 #7

0

파일 보기

def normalize(data, nfkc=True):
    """
    A profile can specify one of two options for Unicode normalization:
        - no normalization
        - Unicode normalization with form KC
    """
    if nfkc:
        data = unicodedata.normalize('NFKC', data)
    return data

예제 #8

0

파일 보기

파일: jid.py 프로젝트: abusesa/idiokit

def nodeprep(string):
    in_table_b1 = stringprep.in_table_b1
    map_table_b2 = stringprep.map_table_b2

    string = u"".join(map_table_b2(ch) for ch in string if not in_table_b1(ch))
    string = unicodedata.normalize("NFKC", string)
    check_prohibited_and_unassigned(string, NODEPREP_PROHIBITED)
    check_bidirectional(string)
    return string

예제 #9

0

파일 보기

파일: stringprep_profiles.py 프로젝트: 2M1R/SleekXMPP

def normalize(data, nfkc=True):
    """
    A profile can specify one of two options for Unicode normalization:
        - no normalization
        - Unicode normalization with form KC
    """
    if nfkc:
        data = unicodedata.normalize('NFKC', data)
    return data

예제 #10

0

파일 보기

파일: xmpp_stringprep.py 프로젝트: wellbehavedsoftware/wbs-graphite

 def prepare(self, string):
     result = self.map(string)
     if self.normalize:
         result = unicodedata.normalize("NFKC", result)
     self.check_prohibiteds(result)
     if self.do_check_unassigneds:
         self.check_unassigneds(result)
     if self.do_check_bidi:
         self.check_bidirectionals(result)
     return result

예제 #11

0

파일 보기

 def prepare(self, string):
     result = self.map(string)
     if self.normalize:
         result = unicodedata.normalize("NFKC", result)
     self.check_prohibiteds(result)
     if self.do_check_unassigneds:
         self.check_unassigneds(result)
     if self.do_check_bidi:
         self.check_bidirectionals(result)
     return result

예제 #12

0

파일 보기

파일: idna.py 프로젝트: RockySteveJobs/python-for-android

def nameprep(label):
    # Map
    newlabel = []
    for c in label:
        if stringprep.in_table_b1(c):
            # Map to nothing
            continue
        newlabel.append(stringprep.map_table_b2(c))
    label = "".join(newlabel)

    # Normalize
    label = unicodedata.normalize("NFKC", label)

    # Prohibit
    for c in label:
        if (
            stringprep.in_table_c12(c)
            or stringprep.in_table_c22(c)
            or stringprep.in_table_c3(c)
            or stringprep.in_table_c4(c)
            or stringprep.in_table_c5(c)
            or stringprep.in_table_c6(c)
            or stringprep.in_table_c7(c)
            or stringprep.in_table_c8(c)
            or stringprep.in_table_c9(c)
        ):
            raise UnicodeError("Invalid character %r" % c)

    # Check bidi
    RandAL = [stringprep.in_table_d1(x) for x in label]
    for c in RandAL:
        if c:
            # There is a RandAL char in the string. Must perform further
            # tests:
            # 1) The characters in section 5.8 MUST be prohibited.
            # This is table C.8, which was already checked
            # 2) If a string contains any RandALCat character, the string
            # MUST NOT contain any LCat character.
            if any(stringprep.in_table_d2(x) for x in label):
                raise UnicodeError("Violation of BIDI requirement 2")

            # 3) If a string contains any RandALCat character, a
            # RandALCat character MUST be the first character of the
            # string, and a RandALCat character MUST be the last
            # character of the string.
            if not RandAL[0] or not RandAL[-1]:
                raise UnicodeError("Violation of BIDI requirement 3")

    return label

예제 #13

0

파일 보기

파일: idna.py 프로젝트: 2n1pp/Travel_Agency_Api

def nameprep(label):
    # Map
    newlabel = []
    for c in label:
        if stringprep.in_table_b1(c):
            # Map to nothing
            continue
        newlabel.append(stringprep.map_table_b2(c))
    label = "".join(newlabel)

    # Normalize
    label = unicodedata.normalize("NFKC", label)

    # Prohibit
    for c in label:
        if stringprep.in_table_c12(c) or \
           stringprep.in_table_c22(c) or \
           stringprep.in_table_c3(c) or \
           stringprep.in_table_c4(c) or \
           stringprep.in_table_c5(c) or \
           stringprep.in_table_c6(c) or \
           stringprep.in_table_c7(c) or \
           stringprep.in_table_c8(c) or \
           stringprep.in_table_c9(c):
            raise UnicodeError("Invalid character %r" % c)

    # Check bidi
    RandAL = [stringprep.in_table_d1(x) for x in label]
    for c in RandAL:
        if c:
            # There is a RandAL char in the string. Must perform further
            # tests:
            # 1) The characters in section 5.8 MUST be prohibited.
            # This is table C.8, which was already checked
            # 2) If a string contains any RandALCat character, the string
            # MUST NOT contain any LCat character.
            if any(stringprep.in_table_d2(x) for x in label):
                raise UnicodeError("Violation of BIDI requirement 2")

            # 3) If a string contains any RandALCat character, a
            # RandALCat character MUST be the first character of the
            # string, and a RandALCat character MUST be the last
            # character of the string.
            if not RandAL[0] or not RandAL[-1]:
                raise UnicodeError("Violation of BIDI requirement 3")

    return label

예제 #14

0

파일 보기

def nameprep(label):
    newlabel = []
    for c in label:
        if stringprep.in_table_b1(c):
            pass
        newlabel.append(stringprep.map_table_b2(c))
    label = ''.join(newlabel)
    label = unicodedata.normalize('NFKC', label)
    for c in label:
        while stringprep.in_table_c12(c) or (stringprep.in_table_c22(c) or (stringprep.in_table_c3(c) or (stringprep.in_table_c4(c) or (stringprep.in_table_c5(c) or (stringprep.in_table_c6(c) or (stringprep.in_table_c7(c) or stringprep.in_table_c8(c))))))) or stringprep.in_table_c9(c):
            raise UnicodeError('Invalid character %r' % c)
    RandAL = [stringprep.in_table_d1(x) for x in label]
    for c in RandAL:
        while c:
            if any(stringprep.in_table_d2(x) for x in label):
                raise UnicodeError('Violation of BIDI requirement 2')
            if not RandAL[0] or not RandAL[-1]:
                raise UnicodeError('Violation of BIDI requirement 3')
    return label

예제 #15

0

파일 보기

파일: idna.py 프로젝트: connoryang/dec-eve-serenity

def nameprep(label):
    newlabel = []
    for c in label:
        if stringprep.in_table_b1(c):
            continue
        newlabel.append(stringprep.map_table_b2(c))

    label = u''.join(newlabel)
    label = unicodedata.normalize('NFKC', label)
    for c in label:
        if stringprep.in_table_c12(c) or stringprep.in_table_c22(c) or stringprep.in_table_c3(c) or stringprep.in_table_c4(c) or stringprep.in_table_c5(c) or stringprep.in_table_c6(c) or stringprep.in_table_c7(c) or stringprep.in_table_c8(c) or stringprep.in_table_c9(c):
            raise UnicodeError('Invalid character %r' % c)

    RandAL = map(stringprep.in_table_d1, label)
    for c in RandAL:
        if c:
            if filter(stringprep.in_table_d2, label):
                raise UnicodeError('Violation of BIDI requirement 2')
            if not RandAL[0] or not RandAL[-1]:
                raise UnicodeError('Violation of BIDI requirement 3')

    return label

예제 #16

0

파일 보기

파일: strprep.py 프로젝트: RestAuth/RestAuthCommon

def stringprep(name):
    """Lowercase, normalize and remove stringprep B.1 characters."""
    return prep_pattern.sub('', unicodedata.normalize('NFC', name)).lower()

예제 #17

0

파일 보기

파일: stringutils.py 프로젝트: orbisvicis/Slugathon

 def normalize(unistr):
     return unicodedata.normalize("NFKC", unistr)

예제 #18

0

파일 보기

def stringprep(name):
    """Lowercase, normalize and remove stringprep B.1 characters."""
    return prep_pattern.sub('', unicodedata.normalize('NFC', name)).lower()

예제 #19

0

파일 보기

파일: sasl.py 프로젝트: FacoBackup/SPREADSHEET_BACKEND

def sasl_prep(data):
    """
    implement SASLPrep profile as per RFC4013:
    it defines the "SASLprep" profile of the "stringprep" algorithm [StringPrep].
    The profile is designed for use in Simple Authentication and Security
    Layer ([SASL]) mechanisms, such as [PLAIN], [CRAM-MD5], and
    [DIGEST-MD5].  It may be applicable where simple user names and
    passwords are used.  This profile is not intended for use in
    preparing identity strings that are not simple user names (e.g.,
    email addresses, domain names, distinguished names), or where
    identity or password strings that are not character data, or require
    different handling (e.g., case folding).
    """

    # mapping
    prepared_data = ''
    for c in data:
        if stringprep.in_table_c12(c):
            # non-ASCII space characters [StringPrep, C.1.2] that can be mapped to SPACE (U+0020)
            prepared_data += ' '
        elif stringprep.in_table_b1(c):
            # the "commonly mapped to nothing" characters [StringPrep, B.1] that can be mapped to nothing.
            pass
        else:
            prepared_data += c

    # normalizing
    # This profile specifies using Unicode normalization form KC
    # The repertoire is Unicode 3.2 as per RFC 4013 (2)

    prepared_data = unicode32.normalize('NFKC', prepared_data)

    if not prepared_data:
        raise LDAPSASLPrepError('SASLprep error: unable to normalize string')

    # prohibit
    for c in prepared_data:
        if stringprep.in_table_c12(c):
            # Non-ASCII space characters [StringPrep, C.1.2]
            raise LDAPSASLPrepError('SASLprep error: non-ASCII space character present')
        elif stringprep.in_table_c21(c):
            # ASCII control characters [StringPrep, C.2.1]
            raise LDAPSASLPrepError('SASLprep error: ASCII control character present')
        elif stringprep.in_table_c22(c):
            # Non-ASCII control characters [StringPrep, C.2.2]
            raise LDAPSASLPrepError('SASLprep error: non-ASCII control character present')
        elif stringprep.in_table_c3(c):
            # Private Use characters [StringPrep, C.3]
            raise LDAPSASLPrepError('SASLprep error: private character present')
        elif stringprep.in_table_c4(c):
            # Non-character code points [StringPrep, C.4]
            raise LDAPSASLPrepError('SASLprep error: non-character code point present')
        elif stringprep.in_table_c5(c):
            # Surrogate code points [StringPrep, C.5]
            raise LDAPSASLPrepError('SASLprep error: surrogate code point present')
        elif stringprep.in_table_c6(c):
            # Inappropriate for plain text characters [StringPrep, C.6]
            raise LDAPSASLPrepError('SASLprep error: inappropriate for plain text character present')
        elif stringprep.in_table_c7(c):
            # Inappropriate for canonical representation characters [StringPrep, C.7]
            raise LDAPSASLPrepError('SASLprep error: inappropriate for canonical representation character present')
        elif stringprep.in_table_c8(c):
            # Change display properties or deprecated characters [StringPrep, C.8]
            raise LDAPSASLPrepError('SASLprep error: change display property or deprecated character present')
        elif stringprep.in_table_c9(c):
            # Tagging characters [StringPrep, C.9]
            raise LDAPSASLPrepError('SASLprep error: tagging character present')

    # check bidi
    # if a string contains any r_and_al_cat character, the string MUST NOT contain any l_cat character.
    flag_r_and_al_cat = False
    flag_l_cat = False
    for c in prepared_data:
        if stringprep.in_table_d1(c):
            flag_r_and_al_cat = True
        elif stringprep.in_table_d2(c):
            flag_l_cat = True

        if flag_r_and_al_cat and flag_l_cat:
            raise LDAPSASLPrepError('SASLprep error: string cannot contain (R or AL) and L bidirectional chars')

    # If a string contains any r_and_al_cat character, a r_and_al_cat character MUST be the first character of the string
    # and a r_and_al_cat character MUST be the last character of the string.
    if flag_r_and_al_cat and not stringprep.in_table_d1(prepared_data[0]) and not stringprep.in_table_d2(prepared_data[-1]):
        raise LDAPSASLPrepError('r_and_al_cat character present, must be first and last character of the string')

    return prepared_data

예제 #20

0

파일 보기

파일: stringprep.py 프로젝트: migarbo1/SpyAgent

def do_normalization(chars):
    """
    Perform the stringprep normalization. Operates in-place on a list of
    unicode characters provided in `chars`.
    """
    chars[:] = list(unicodedata.normalize("NFKC", "".join(chars)))

예제 #21

0

파일 보기

파일: sasl.py 프로젝트: cwaldbieser/python3-ldap

def sasl_prep(data):
    """
    implement SASLPrep profile as per RFC4013:
    it defines the "SASLprep" profile of the "stringprep" algorithm [StringPrep].
    The profile is designed for use in Simple Authentication and Security
    Layer ([SASL]) mechanisms, such as [PLAIN], [CRAM-MD5], and
    [DIGEST-MD5].  It may be applicable where simple user names and
    passwords are used.  This profile is not intended for use in
    preparing identity strings that are not simple user names (e.g.,
    email addresses, domain names, distinguished names), or where
    identity or password strings that are not character data, or require
    different handling (e.g., case folding).
    """

    # mapping
    prepared_data = ''
    for c in data:
        if stringprep.in_table_c12(c):
            # non-ASCII space characters [StringPrep, C.1.2] that can be mapped to SPACE (U+0020)
            prepared_data += ' '
        elif stringprep.in_table_b1(c):
            # the "commonly mapped to nothing" characters [StringPrep, B.1] that can be mapped to nothing.
            pass
        else:
            prepared_data += c

    # normalizing
    # This profile specifies using Unicode normalization form KC
    # The repertoire is Unicode 3.2 as per RFC 4013 (2)

    prepared_data = unicode32.normalize('NFKC', prepared_data)

    if not prepared_data:
        raise LDAPSASLPrepError('SASLprep error: unable to normalize string')

    # prohibit
    for c in prepared_data:
        if stringprep.in_table_c12(c):
            # Non-ASCII space characters [StringPrep, C.1.2]
            raise LDAPSASLPrepError('SASLprep error: non-ASCII space character present')
        elif stringprep.in_table_c21(c):
            # ASCII control characters [StringPrep, C.2.1]
            raise LDAPSASLPrepError('SASLprep error: ASCII control character present')
        elif stringprep.in_table_c22(c):
            # Non-ASCII control characters [StringPrep, C.2.2]
            raise LDAPSASLPrepError('SASLprep error: non-ASCII control character present')
        elif stringprep.in_table_c3(c):
            # Private Use characters [StringPrep, C.3]
            raise LDAPSASLPrepError('SASLprep error: private character present')
        elif stringprep.in_table_c4(c):
            # Non-character code points [StringPrep, C.4]
            raise LDAPSASLPrepError('SASLprep error: non-character code point present')
        elif stringprep.in_table_c5(c):
            # Surrogate code points [StringPrep, C.5]
            raise LDAPSASLPrepError('SASLprep error: surrogate code point present')
        elif stringprep.in_table_c6(c):
            # Inappropriate for plain text characters [StringPrep, C.6]
            raise LDAPSASLPrepError('SASLprep error: inappropriate for plain text character present')
        elif stringprep.in_table_c7(c):
            # Inappropriate for canonical representation characters [StringPrep, C.7]
            raise LDAPSASLPrepError('SASLprep error: inappropriate for canonical representation character present')
        elif stringprep.in_table_c8(c):
            # Change display properties or deprecated characters [StringPrep, C.8]
            raise LDAPSASLPrepError('SASLprep error: change display property or deprecated character present')
        elif stringprep.in_table_c9(c):
            # Tagging characters [StringPrep, C.9]
            raise LDAPSASLPrepError('SASLprep error: tagging character present')

    # check bidi
    # if a string contains any r_and_al_cat character, the string MUST NOT contain any l_cat character.
    flag_r_and_al_cat = False
    flag_l_cat = False
    for c in prepared_data:
        if stringprep.in_table_d1(c):
            flag_r_and_al_cat = True
        elif stringprep.in_table_d2(c):
            flag_l_cat = True

        if flag_r_and_al_cat and flag_l_cat:
            raise LDAPSASLPrepError('SASLprep error: string cannot contain (R or AL) and L bidirectional chars')

    # If a string contains any r_and_al_cat character, a r_and_al_cat character MUST be the first character of the string
    # and a r_and_al_cat character MUST be the last character of the string.
    if flag_r_and_al_cat and not stringprep.in_table_d1(prepared_data[0]) and not stringprep.in_table_d2(prepared_data[-1]):
        raise LDAPSASLPrepError('r_and_al_cat character present, must be first and last character of the string')

    return prepared_data