def slugify(value): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode # unidecode returns str in Py2 and 3, so in Py2 we have to make # it unicode again value = unidecode(value) if isinstance(value, six.binary_type): value = value.decode("ascii") # still unicode value = unicodedata.normalize("NFKD", value) value = re.sub("[^\w\s-]", "", value).strip().lower() value = re.sub("[-\s]+", "-", value) # we want only ASCII chars value = value.encode("ascii", "ignore") # but Pelican should generally use only unicode return value.decode("ascii")
def slugify(value, substitutions=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode # unidecode returns str in Py2 and 3, so in Py2 we have to make # it unicode again value = unidecode(value) if isinstance(value, six.binary_type): value = value.decode('ascii') # still unicode value = unicodedata.normalize('NFKD', value).lower() for src, dst in substitutions: value = value.replace(src.lower(), dst.lower()) value = re.sub('[^\w\s-]', '', value).strip() value = re.sub('[-\s]+', '-', value) # we want only ASCII chars value = value.encode('ascii', 'ignore') # but Pelican should generally use only unicode return value.decode('ascii')
def slugify(value, regex_subs=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode value = unidecode(value) if isinstance(value, bytes): value = value.decode('ascii') # still unicode value = unicodedata.normalize('NFKD', value) for src, dst in regex_subs: value = re.sub(src, dst, value, flags=re.IGNORECASE) # convert to lowercase value = value.lower() # we want only ASCII chars value = value.encode('ascii', 'ignore').strip() # but Pelican should generally use only unicode return value.decode('ascii')
def slugify(value, substitutions=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode # unidecode returns str in Py2 and 3, so in Py2 we have to make # it unicode again value = unidecode(value) if isinstance(value, six.binary_type): value = value.decode('ascii') # still unicode value = unicodedata.normalize('NFKD', value).lower() # backward compatible covert from 2-tuples to 3-tuples new_subs = [] for tpl in substitutions: try: src, dst, skip = tpl except ValueError: src, dst = tpl skip = False new_subs.append((src, dst, skip)) substitutions = tuple(new_subs) # by default will replace non-alphanum characters replace = True for src, dst, skip in substitutions: orig_value = value value = value.replace(src.lower(), dst.lower()) # if replacement was made then skip non-alphanum # replacement if instructed to do so if value != orig_value: replace = replace and not skip if replace: value = re.sub(r'[^\w\s-]', '', value).strip() value = re.sub(r'[-\s]+', '-', value) else: value = value.strip() # we want only ASCII chars value = value.encode('ascii', 'ignore') # but Pelican should generally use only unicode return value.decode('ascii')