Пример #1
0
def canonical_url(url):
  # check cache
  cache_key = "canonical_url(%s)"
  cached = memcache.get(cache_key % url)
  if cached:
    return cached
  # fetch conent
  html = fetch_decode(url)
  if html:
    # extract canonical url
    _url = canonical_url_in_html(html, url)
    if _url:
      # cache for posterity
      memcache.set(cache_key, _url)
      return _url
Пример #2
0
 def page_content(self):
   content = fetch_decode(self.url)
   if content:
     # decode html entities, strip tags
     return emend.html.clean(content)