def redis_fetch_dailylist(date): prefix_url = 'http://news.at.zhihu.com/api/4/news/before/' cache = redis.Redis(host='localhost', port=6379, db=0) url = ''.join([prefix_url, date]) content = fetch_url_content(url=url, port=80, timeout=15) if content: cache.set(date, content) return True return False
def redis_fetch_article(article_id): article_id = str(article_id) cache = redis.Redis(host='localhost', port=6379, db=1) url = gen_article_url(article_id) content = fetch_url_content(url=url, port=80, timeout=15) if content: cache.set(article_id, content) return True return False
#!/usr/bin/python # -*- coding: utf-8 -*- import redis from crawlerutils import fetch_url_content from crawlerutils import gen_list_url def get_invalid_dates(host, port, db): date_list = [] cache = redis.Redis(host=host, port=port, db=db) for date in cache.keys(): content = cache[date] if len(content) < 10: date_list.append([date, content]) return date_list if __name__ == '__main__': invalidset = get_invalid_dates(host='localhost', port=6379, db=0) print 'invalidset: ', invalidset for item in invalidset: date = item[0] url = gen_list_url(date) content = fetch_url_content(url=url, port=80, timeout=15) print 'refetch content:{content}'.format(content=content)