def get_entries(): url = URL start = now() last = now() pages = 0 while url and diff(start, last) < OLDEST_SEC: debug(u"diff {diff}", diff=diff(start, last)) page = get_page(url) pages += 1 posts = len(page['posts']) info(u"page {pages} {url} {posts} posts") for post in page['posts']: entry = get_entry(post['link'], post['prefix'], post['title']) yield entry last = min(last, entry['updated']) sleep() if not page['posts']: sleep() url = page['next_url'] sleep()
def post_time(ts): if not ts: return now() dt = datetime.datetime.strptime(ts, POSTFMT) dt = utc(dt) return dt.strftime(ISOFMT)