def canonical_url(url): # check cache cache_key = "canonical_url(%s)" cached = memcache.get(cache_key % url) if cached: return cached # fetch conent html = fetch_decode(url) if html: # extract canonical url _url = canonical_url_in_html(html, url) if _url: # cache for posterity memcache.set(cache_key, _url) return _url
def page_content(self): content = fetch_decode(self.url) if content: # decode html entities, strip tags return emend.html.clean(content)