Пример #1
0
    def normcase(path):
        '''
        Normalize a filename for OS X-compatible comparison:
        - escape-encode invalid characters
        - decompose to NFD
        - lowercase
        - omit ignored characters [200c-200f, 202a-202e, 206a-206f,feff]

        >>> normcase('UPPER')
        'upper'
        >>> normcase('Caf\xc3\xa9')
        'cafe\\xcc\\x81'
        >>> normcase('\xc3\x89')
        'e\\xcc\\x81'
        >>> normcase('\xb8\xca\xc3\xca\xbe\xc8.JPG') # issue3918
        '%b8%ca%c3\\xca\\xbe%c8.jpg'
        '''

        try:
            return encoding.asciilower(path)  # exception for non-ASCII
        except UnicodeDecodeError:
            return normcasefallback(path)
Пример #2
0
    def normcase(path):
        '''
        Normalize a filename for OS X-compatible comparison:
        - escape-encode invalid characters
        - decompose to NFD
        - lowercase
        - omit ignored characters [200c-200f, 202a-202e, 206a-206f,feff]

        >>> normcase('UPPER')
        'upper'
        >>> normcase('Caf\xc3\xa9')
        'cafe\\xcc\\x81'
        >>> normcase('\xc3\x89')
        'e\\xcc\\x81'
        >>> normcase('\xb8\xca\xc3\xca\xbe\xc8.JPG') # issue3918
        '%b8%ca%c3\\xca\\xbe%c8.jpg'
        '''

        try:
            return encoding.asciilower(path)  # exception for non-ASCII
        except UnicodeDecodeError:
            return normcasefallback(path)
Пример #3
0
    def normcase(path):
        '''
        Normalize a filename for OS X-compatible comparison:
        - escape-encode invalid characters
        - decompose to NFD
        - lowercase
        - omit ignored characters [200c-200f, 202a-202e, 206a-206f,feff]

        >>> normcase('UPPER')
        'upper'
        >>> normcase('Caf\xc3\xa9')
        'cafe\\xcc\\x81'
        >>> normcase('\xc3\x89')
        'e\\xcc\\x81'
        >>> normcase('\xb8\xca\xc3\xca\xbe\xc8.JPG') # issue3918
        '%b8%ca%c3\\xca\\xbe%c8.jpg'
        '''

        try:
            return encoding.asciilower(path)  # exception for non-ASCII
        except UnicodeDecodeError:
            pass
        try:
            u = path.decode('utf-8')
        except UnicodeDecodeError:
            # OS X percent-encodes any bytes that aren't valid utf-8
            s = ''
            g = ''
            l = 0
            for c in path:
                o = ord(c)
                if l and o < 128 or o >= 192:
                    # we want a continuation byte, but didn't get one
                    s += ''.join(["%%%02X" % ord(x) for x in g])
                    g = ''
                    l = 0
                if l == 0 and o < 128:
                    # ascii
                    s += c
                elif l == 0 and 194 <= o < 245:
                    # valid leading bytes
                    if o < 224:
                        l = 1
                    elif o < 240:
                        l = 2
                    else:
                        l = 3
                    g = c
                elif l > 0 and 128 <= o < 192:
                    # valid continuations
                    g += c
                    l -= 1
                    if not l:
                        s += g
                        g = ''
                else:
                    # invalid
                    s += "%%%02X" % o

            # any remaining partial characters
            s += ''.join(["%%%02X" % ord(x) for x in g])
            u = s.decode('utf-8')

        # Decompose then lowercase (HFS+ technote specifies lower)
        enc = unicodedata.normalize('NFD', u).lower().encode('utf-8')
        # drop HFS+ ignored characters
        return encoding.hfsignoreclean(enc)