def slugify(s, delete_chars=SlugDeleteChars, subst_char=SubstChar): """ Convert (unicode) string to slug. * This only handles Western-language strings with very basic accents. """ from garage.html_utils import strip_tags, unescape def convert_unwanted_chars(txt): converted = [] for ch in txt: if ch in delete_chars: ch = subst_char converted.append(ch) return ''.join(converted) s = s.strip("\r\n") s = s.replace("\n", " ") s = strip_accents(s) s = strip_tags(unescape(s)) s = re.sub(r"['’]s", 's', s) s = re.sub(r'([0-9\.]+)%', '\\1-percent', s) s = s.replace("&", " and ") s = s.replace("&", " and ") s = s.replace("/", " ") s = s.replace(" ", "-") s = s.replace("_", "-") s = convert_unwanted_chars(s) s = re.sub(r'\.\.+', '.', s) s = re.sub(r'--+', '-', s) s = s.strip('.') s = s.strip('-') s = s.lower() return s
def slugify(s, delete_chars=SlugDeleteChars, subst_char=SubstChar): """ Convert (unicode) string to slug. """ def convert_unwanted_chars(txt): converted = [] for ch in txt: if ch in delete_chars: ch = subst_char converted.append(ch) return u''.join(converted) s = s.decode("utf-8") s = s.strip(u"\r\n") s = s.replace(u"\n", u" ") s = strip_accents(s) s = strip_tags(unescape(s)) s = re.sub(r"[']s", u's', s) s = re.sub(r'([0-9\.]+)%', u'\\1-percent', s) s = s.replace(u"&", u" and ") s = s.replace(u"&", u" and ") s = s.replace(u"/", u" ") s = s.replace(u" ", u"-") s = s.replace(u"_", u"-") s = convert_unwanted_chars(s) s = re.sub(r'\.\.+', u'.', s) s = re.sub(r'--+', u'-', s) s = s.strip(u'.') s = s.strip(u'-') s = s.lower() return s
def test_unescape(self): """ Ensure unescape function is working properly. """ from garage.html_utils import unescape self._msg('test', 'unescape', first=True) txt = '<em>he said, "q & a"</em> <abc> écriture 寫作' expected = '<em>he said, "q & a"</em> <abc> écriture 寫作' result = unescape(txt) self._msg('text', txt) self._msg('result', result) self._msg('expected', expected) self.assertEqual(result, expected)