def slugify(string): """Produce a URL-friendly string from the input. XHTML entities are converted to unicode, and then replaced with the best-choice ascii equivalents. :param string: A title, name, etc :type string: unicode :returns: Ascii URL-friendly slug :rtype: unicode """ string = unicode(string).lower() # Replace xhtml entities string = entities_to_unicode(string) # Transliterate to ASCII, as best as possible: string = unidecode(string) # String may now contain '[?]' triplets to describe unknown characters. # These will be stripped out by the following regexes. string = _whitespace.sub(u'-', string) string = _non_alpha.sub(u'', string) string = _extra_dashes.sub(u'-', string).strip('-') return string[:SLUG_LENGTH]
def test_entities_to_unicode(self): testtext = 'Playing Toccata & Fugue <script>evil/script</script>' testtextunicode = entities_to_unicode(testtext) assert_equals(testtextunicode, 'Playing Toccata & Fugue evil/script')