def slugify(value, regex_subs=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode # unidecode returns str in Py2 and 3, so in Py2 we have to make # it unicode again value = unidecode(value) if isinstance(value, six.binary_type): value = value.decode('ascii') # still unicode value = unicodedata.normalize('NFKD', value) for src, dst in regex_subs: value = re.sub(src, dst, value, flags=re.IGNORECASE) # convert to lowercase value = value.lower() # we want only ASCII chars value = value.encode('ascii', 'ignore').strip() # but Pelican should generally use only unicode return value.decode('ascii')
def slugify(value, regex_subs=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode value = unidecode(value) if isinstance(value, bytes): value = value.decode('ascii') # still unicode value = unicodedata.normalize('NFKD', value) for src, dst in regex_subs: value = re.sub(src, dst, value, flags=re.IGNORECASE) # convert to lowercase value = value.lower() # we want only ASCII chars value = value.encode('ascii', 'ignore').strip() # but Pelican should generally use only unicode return value.decode('ascii')
def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ #changed regex subs #imported re #changed unicode if regex_subs == (): regex_subs = (DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']) import unicodedata import unidecode import re def normalize_unicode(text): # normalize text by compatibility composition # see: https://en.wikipedia.org/wiki/Unicode_equivalence return unicodedata.normalize('NFKC', text) # strip tags from value value = Markup(value).striptags() # normalization value = normalize_unicode(value) if not use_unicode: # ASCII-fy value = unidecode.unidecode(value) # perform regex substitutions for src, dst in regex_subs: value = re.sub(normalize_unicode(src), normalize_unicode(dst), value, flags=re.IGNORECASE) if not preserve_case: value = value.lower() value.replace(" ", "-") #test return value.strip()