def normcasefallback(path): try: u = path.decode('utf-8') except UnicodeDecodeError: # OS X percent-encodes any bytes that aren't valid utf-8 s = '' g = '' l = 0 for c in path: o = ord(c) if l and o < 128 or o >= 192: # we want a continuation byte, but didn't get one s += ''.join(["%%%02X" % ord(x) for x in g]) g = '' l = 0 if l == 0 and o < 128: # ascii s += c elif l == 0 and 194 <= o < 245: # valid leading bytes if o < 224: l = 1 elif o < 240: l = 2 else: l = 3 g = c elif l > 0 and 128 <= o < 192: # valid continuations g += c l -= 1 if not l: s += g g = '' else: # invalid s += "%%%02X" % o # any remaining partial characters s += ''.join(["%%%02X" % ord(x) for x in g]) u = s.decode('utf-8') # Decompose then lowercase (HFS+ technote specifies lower) enc = unicodedata.normalize('NFD', u).lower().encode('utf-8') # drop HFS+ ignored characters return encoding.hfsignoreclean(enc)
def normcase(path): ''' Normalize a filename for OS X-compatible comparison: - escape-encode invalid characters - decompose to NFD - lowercase - omit ignored characters [200c-200f, 202a-202e, 206a-206f,feff] >>> normcase('UPPER') 'upper' >>> normcase('Caf\xc3\xa9') 'cafe\\xcc\\x81' >>> normcase('\xc3\x89') 'e\\xcc\\x81' >>> normcase('\xb8\xca\xc3\xca\xbe\xc8.JPG') # issue3918 '%b8%ca%c3\\xca\\xbe%c8.jpg' ''' try: return encoding.asciilower(path) # exception for non-ASCII except UnicodeDecodeError: pass try: u = path.decode('utf-8') except UnicodeDecodeError: # OS X percent-encodes any bytes that aren't valid utf-8 s = '' g = '' l = 0 for c in path: o = ord(c) if l and o < 128 or o >= 192: # we want a continuation byte, but didn't get one s += ''.join(["%%%02X" % ord(x) for x in g]) g = '' l = 0 if l == 0 and o < 128: # ascii s += c elif l == 0 and 194 <= o < 245: # valid leading bytes if o < 224: l = 1 elif o < 240: l = 2 else: l = 3 g = c elif l > 0 and 128 <= o < 192: # valid continuations g += c l -= 1 if not l: s += g g = '' else: # invalid s += "%%%02X" % o # any remaining partial characters s += ''.join(["%%%02X" % ord(x) for x in g]) u = s.decode('utf-8') # Decompose then lowercase (HFS+ technote specifies lower) enc = unicodedata.normalize('NFD', u).lower().encode('utf-8') # drop HFS+ ignored characters return encoding.hfsignoreclean(enc)
def _lowerclean(s): return encoding.hfsignoreclean(s.lower())