def slugify(value, limit=None, default='', lower=True, dot_allowed=False): value = smart_unicode(value) # Replace all non-allowed chars with "-" char # to help pytils not to crash if dot_allowed: value = RE_NOT_ENRUCHAR_DOT.sub('-', value) else: value = RE_NOT_ENRUCHAR.sub('-', value) # Do transliteration value = translify(value) # Replace trash with safe "-" char if dot_allowed: value = RE_NOT_ENCHAR_DOT.sub('-', value) else: value = RE_NOT_ENCHAR.sub('-', value) # Replace "-" from both side of the string value = value.strip('-') if lower: value = value.lower() # Replace sequences of dashes value = RE_DASH.sub('-', value) if limit is not None: value = value[:limit] if value != "": return value else: return default
def normalize_url(url): # The idea is to quick check that URL contains only safe chars # If whole URL is safe then there is no need to extract hostname part # and check if it is IDN if RE_NOT_SAFE_URL.search(url): parts = list(urlsplit(url)) if RE_NON_ASCII.search(parts[1]): parts[1] = str(smart_unicode(parts[1]).encode('idna').decode()) url = urlunsplit(parts) return url return url
def __unicode__(self): return smart_unicode(self.file)
def __unicode__(self): return smart_unicode(self.name) or u''