def slugify(value, substitutions=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode # unidecode returns str in Py2 and 3, so in Py2 we have to make # it unicode again value = unidecode(value) if isinstance(value, six.binary_type): value = value.decode('ascii') # still unicode value = unicodedata.normalize('NFKD', value).lower() for src, dst in substitutions: value = value.replace(src.lower(), dst.lower()) value = re.sub('[^\w\s-]', '', value).strip() value = re.sub('[-\s]+', '-', value) # we want only ASCII chars value = value.encode('ascii', 'ignore') # but Pelican should generally use only unicode return value.decode('ascii')
def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ #changed regex subs #imported re #changed unicode if regex_subs == (): regex_subs = (DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']) import unicodedata import unidecode import re def normalize_unicode(text): # normalize text by compatibility composition # see: https://en.wikipedia.org/wiki/Unicode_equivalence return unicodedata.normalize('NFKC', text) # strip tags from value value = Markup(value).striptags() # normalization value = normalize_unicode(value) if not use_unicode: # ASCII-fy value = unidecode.unidecode(value) # perform regex substitutions for src, dst in regex_subs: value = re.sub(normalize_unicode(src), normalize_unicode(dst), value, flags=re.IGNORECASE) if not preserve_case: value = value.lower() value.replace(" ", "-") #test return value.strip()
def slugify(value, substitutions=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. Took from Django sources. """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() # value must be unicode per se import unicodedata from unidecode import unidecode # unidecode returns str in Py2 and 3, so in Py2 we have to make # it unicode again value = unidecode(value) if isinstance(value, six.binary_type): value = value.decode('ascii') # still unicode value = unicodedata.normalize('NFKD', value).lower() # backward compatible covert from 2-tuples to 3-tuples new_subs = [] for tpl in substitutions: try: src, dst, skip = tpl except ValueError: src, dst = tpl skip = False new_subs.append((src, dst, skip)) substitutions = tuple(new_subs) # by default will replace non-alphanum characters replace = True for src, dst, skip in substitutions: orig_value = value value = value.replace(src.lower(), dst.lower()) # if replacement was made then skip non-alphanum # replacement if instructed to do so if value != orig_value: replace = replace and not skip if replace: value = re.sub(r'[^\w\s-]', '', value).strip() value = re.sub(r'[-\s]+', '-', value) else: value = value.strip() # we want only ASCII chars value = value.encode('ascii', 'ignore') # but Pelican should generally use only unicode return value.decode('ascii')
def convert_markdown(text, convert_url=None, *, inline=False): convert_url = convert_url if convert_url else lambda x: x # Workaround for https://github.com/lepture/mistune/issues/125 NBSP_REPLACER = '\uf8ff' text = text.replace('\N{NO-BREAK SPACE}', NBSP_REPLACER) text = dedent(text) markdown = Markdown( escape=False, block=BlockLexer(), renderer=Renderer(convert_url), ) result = Markup(markdown(text)).strip() if inline and result.startswith('<p>') and result.endswith('</p>'): result = result[len('<p>'):-len('</p>')] # Workaround for https://github.com/lepture/mistune/issues/125 result = result.replace(NBSP_REPLACER, '\N{NO-BREAK SPACE}') return result