def category_entries(category_id): cursor = Cursor(category_id) generator = None url_token, entry_after, read, starred = get_optional_args() if url_token: try: generator = get_entry_generator(url_token) except IteratorNotFound: pass else: url_token = text_type(now()) if not generator: subscriptions = cursor.recursive_subscriptions generator = CategoryEntryGenerator() if entry_after: id_after, time_after = decode_entry_after(entry_after) else: time_after = None id_after = None for subscription in subscriptions: try: with get_stage() as stage: feed = stage.feeds[subscription.feed_id] except KeyError: continue feed_title = text_type(feed.title) it = iter(feed.entries) feed_permalink = get_permalink(feed) child = FeedEntryGenerator(category_id, subscription.feed_id, feed_title, feed_permalink, it, now(), read, starred) generator.add(child) generator.set_generators(id_after, time_after) save_entry_generators(url_token, generator) tidy_generators_up() entries = generator.get_entries() if not entries or len(entries) < app.config['PAGE_SIZE']: next_url = None if not entries: remove_entry_generator(url_token) else: next_url = make_next_url( category_id, url_token, encode_entry_after(entries[-1]['entry_id'], entries[-1]['updated']), read, starred) # FIXME: use Entry.updated_at instead of from json data. codec = Rfc3339() last_updated_at = '' if len(entries) and not entry_after: last_updated_at = max(codec.decode(x['updated']) for x in entries).isoformat() return jsonify(title=category_id.split('/')[-1][1:] or app.config['ALLFEED'], entries=entries, read_url=url_for('read_all_entries', category_id=category_id, last_updated=last_updated_at, _external=True), next_url=next_url)
def feed_entries(category_id, feed_id): stage = get_stage() Cursor(category_id) try: with stage: feed = stage.feeds[feed_id] except KeyError: r = jsonify( error='feed-not-found', message='Given feed does not exist' ) r.status_code = 404 return r url_token, entry_after, read, starred = get_optional_args() generator = None if url_token: try: generator = get_entry_generator(url_token) except IteratorNotFound: pass else: url_token = str(now()) if not generator: it = iter(feed.entries) feed_title = clean_html(str(feed.title)) feed_permalink = get_permalink(feed) generator = FeedEntryGenerator(category_id, feed_id, feed_title, feed_permalink, it, now(), read, starred) try: generator.set_iterator(entry_after) except StopIteration: return jsonify( title=generator.feed_title, entries=[], next_url=None ) save_entry_generators(url_token, generator) tidy_generators_up() entries = generator.get_entries() if len(entries) < app.config['PAGE_SIZE']: next_url = None if not entries: remove_entry_generator(url_token) else: next_url = make_next_url( category_id, url_token, entries[-1]['entry_id'], read, starred, feed_id ) return jsonify( title=clean_html(str(feed.title)), entries=entries, next_url=next_url )
def test_session_revise(): doc = TestMergeableDoc() min_updated_at = now() session = Session() session.revise(doc) assert isinstance(doc.__revision__, Revision) assert doc.__revision__.session is session assert min_updated_at <= doc.__revision__.updated_at <= now() time.sleep(0.1) min_updated_at = now() session.revise(doc) assert min_updated_at <= doc.__revision__.updated_at <= now()
def test_stage_write(fx_repo, fx_session, fx_stage): doc = TestDoc() min_ts = now() with fx_stage: wdoc = fx_stage.write(['doc.{0}.xml'.format(fx_session.identifier)], doc) assert wdoc.__revision__.session is fx_session assert min_ts <= wdoc.__revision__.updated_at <= now() xml = fx_repo.data['doc.{0}.xml'.format(fx_session.identifier)] read_doc = read(TestDoc, [xml]) assert isinstance(read_doc, TestDoc) assert read_doc.__revision__ == wdoc.__revision__
def category_entries(category_id): cursor = Cursor(category_id) generator = None url_token, entry_after, read, starred = get_optional_args() if url_token: try: generator = get_entry_generator(url_token) except IteratorNotFound: pass else: url_token = str(now()) if not generator: subscriptions = cursor.recursive_subscriptions generator = CategoryEntryGenerator() if entry_after: id_after, time_after = decode_entry_after(entry_after) else: time_after = None id_after = None for subscription in subscriptions: try: with get_stage() as stage: feed = stage.feeds[subscription.feed_id] except KeyError: continue feed_id = get_hash(feed.id) feed_title = clean_html(str(feed.title)) it = iter(feed.entries) feed_permalink = get_permalink(feed) child = FeedEntryGenerator(category_id, feed_id, feed_title, feed_permalink, it, now(), read, starred) generator.add(child) generator.set_generators(id_after, time_after) save_entry_generators(url_token, generator) tidy_generators_up() entries = generator.get_entries() if not entries or len(entries) < app.config['PAGE_SIZE']: next_url = None if not entries: remove_entry_generator(url_token) else: next_url = make_next_url( category_id, url_token, encode_entry_after(entries[-1]['entry_id'], entries[-1]['updated']), read, starred ) return jsonify( title=category_id.split('/')[-1][1:] or app.config['ALLFEED'], entries=entries, next_url=next_url )
def test_revision(): session = Session() updated_at = now() rev = Revision(session, updated_at) assert rev == (session, updated_at) assert rev[0] is rev.session is session assert rev[1] == rev.updated_at == updated_at
def tidy_generators_up(): global entry_generators generators = [] for key, (it, time_saved) in entry_generators.items(): if time_saved >= now() - datetime.timedelta(minutes=30): generators.append((key, (it, time_saved))) generators = sorted(generators, key=lambda generator: generator[1][1], reverse=True) entry_generators = dict(generators[:10])
def test_revision_set_contains(fx_revision_set): assert not fx_revision_set.contains(Revision(Session('key0'), now())) assert not fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2013, 9, 27, 16, 54, 50, tzinfo=utc))) assert fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2013, 9, 22, 16, 58, 57, tzinfo=utc))) assert fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc))) assert not fx_revision_set.contains( Revision(Session('key0'), datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc)))
def test_revision_set_contains(fx_revision_set): assert not fx_revision_set.contains(Revision(Session('key0'), now())) assert not fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2013, 9, 27, 16, 54, 50, tzinfo=utc)) ) assert fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2013, 9, 22, 16, 58, 57, tzinfo=utc)) ) assert fx_revision_set.contains( Revision(Session('key1'), datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc)) ) assert not fx_revision_set.contains( Revision(Session('key0'), datetime.datetime(2012, 9, 22, 16, 58, 57, tzinfo=utc)) )
def test_ensure_revision_pair(): session = Session() updated_at = now() assert ensure_revision_pair((session, updated_at)) == (session, updated_at) pair = ensure_revision_pair((session, updated_at), force_cast=True) assert isinstance(pair, Revision) assert pair == (session, updated_at) with raises(TypeError): ensure_revision_pair(()) with raises(TypeError): ensure_revision_pair((session,)) with raises(TypeError): ensure_revision_pair((session, updated_at, 1)) with raises(TypeError): ensure_revision_pair(session) with raises(TypeError): ensure_revision_pair((session, 1)) with raises(TypeError): ensure_revision_pair((1, updated_at))
def test_ensure_revision_pair(): session = Session() updated_at = now() assert ensure_revision_pair((session, updated_at)) == (session, updated_at) pair = ensure_revision_pair((session, updated_at), force_cast=True) assert isinstance(pair, Revision) assert pair == (session, updated_at) with raises(TypeError): ensure_revision_pair(()) with raises(TypeError): ensure_revision_pair((session, )) with raises(TypeError): ensure_revision_pair((session, updated_at, 1)) with raises(TypeError): ensure_revision_pair(session) with raises(TypeError): ensure_revision_pair((session, 1)) with raises(TypeError): ensure_revision_pair((1, updated_at))
def category_entries(category_id): cursor = Cursor(category_id) generator = None url_token, entry_after, read, starred = get_optional_args() if url_token: try: generator = get_entry_generator(url_token) except IteratorNotFound: pass else: url_token = text_type(now()) if not generator: subscriptions = cursor.recursive_subscriptions generator = CategoryEntryGenerator() if entry_after: id_after, time_after = entry_after.split('@') else: time_after = None id_after = None for subscription in subscriptions: try: with stage: feed = stage.feeds[subscription.feed_id] except KeyError: continue feed_title = text_type(feed.title) it = iter(feed.entries) feed_permalink = get_permalink(feed) try: child = FeedEntryGenerator(category_id, subscription.feed_id, feed_title, feed_permalink, it, now(), read, starred) except StopIteration: continue generator.add(child) generator.set_generators(id_after, time_after) save_entry_generators(url_token, generator) tidy_generators_up() entries = generator.get_entries() if not entries or len(entries) < app.config['PAGE_SIZE']: next_url = None if not entries: remove_entry_generator(url_token) else: entry_after = entries[-1]['entry_id'] + '@' + entries[-1]['updated'] next_url = make_next_url(category_id, url_token, entry_after, read, starred) # FIXME: use Entry.updated_at instead of from json data. codec = Rfc3339() last_updated_at = '' if len(entries) and not entry_after: last_updated_at = max(codec.decode(x['updated']) for x in entries).isoformat() if worker.is_running(): crawl_url = url_for('update_entries', category_id=category_id), else: crawl_url = None return jsonify( title=category_id.split('/')[-1][1:] or app.config['ALLFEED'], entries=entries, read_url=url_for('read_all_entries', category_id=category_id, last_updated=last_updated_at, _external=True), crawl_url=crawl_url, next_url=next_url )
def feed_entries(category_id, feed_id): try: Cursor(category_id) except InvalidCategoryID: r = jsonify( error='category-id-invalid', message='Given category does not exist' ) r.status_code = 404 return r try: with stage: feed = stage.feeds[feed_id] except KeyError: r = jsonify( error='feed-not-found', message='Given feed does not exist' ) r.status_code = 404 return r if feed.__revision__: updated_at = feed.__revision__.updated_at if request.if_modified_since: if_modified_since = request.if_modified_since.replace(tzinfo=utc) last_modified = updated_at.replace(microsecond=0) if if_modified_since >= last_modified: return '', 304, {} # Not Modified else: updated_at = None if worker.is_running(): crawl_url = url_for('update_entries', category_id=category_id, feed_id=feed_id) else: crawl_url = None url_token, entry_after, read, starred = get_optional_args() generator = None if url_token: try: generator = get_entry_generator(url_token) except IteratorNotFound: pass else: url_token = text_type(now()) if not generator: it = iter(feed.entries) feed_title = text_type(feed.title) feed_permalink = get_permalink(feed) try: generator = FeedEntryGenerator(category_id, feed_id, feed_title, feed_permalink, it, now(), read, starred) generator.set_iterator(entry_after) except StopIteration: return jsonify( title=feed_title, entries=[], next_url=None, read_url=url_for('read_all_entries', feed_id=feed_id, last_updated=(updated_at or now()).isoformat(), _external=True), crawl_url=crawl_url ) save_entry_generators(url_token, generator) tidy_generators_up() entries = generator.get_entries() if len(entries) < app.config['PAGE_SIZE']: next_url = None if not entries: remove_entry_generator(url_token) else: next_url = make_next_url( category_id, url_token, entries[-1]['entry_id'], read, starred, feed_id ) response = jsonify( title=text_type(feed.title), entries=entries, next_url=next_url, read_url=url_for('read_all_entries', feed_id=feed_id, last_updated=(updated_at or now()).isoformat(), _external=True), crawl_url=crawl_url ) if feed.__revision__: response.last_modified = updated_at return response
def save_entry_generators(url_token, generator): entry_generators[url_token] = generator, now()
def test_now(): before = datetime.datetime.utcnow().replace(tzinfo=utc) actual = now() after = datetime.datetime.utcnow().replace(tzinfo=utc) assert before <= actual <= after
def feed_entries(category_id, feed_id): stage = get_stage() Cursor(category_id) try: with stage: feed = stage.feeds[feed_id] except KeyError: r = jsonify(error='feed-not-found', message='Given feed does not exist') r.status_code = 404 return r if feed.__revision__: updated_at = feed.__revision__.updated_at if request.if_modified_since: if_modified_since = request.if_modified_since.replace(tzinfo=utc) last_modified = updated_at.replace(microsecond=0) if if_modified_since >= last_modified: return '', 304, {} # Not Modified else: updated_at = None url_token, entry_after, read, starred = get_optional_args() generator = None if url_token: try: generator = get_entry_generator(url_token) except IteratorNotFound: pass else: url_token = text_type(now()) if not generator: it = iter(feed.entries) feed_title = text_type(feed.title) feed_permalink = get_permalink(feed) generator = FeedEntryGenerator(category_id, feed_id, feed_title, feed_permalink, it, now(), read, starred) try: generator.set_iterator(entry_after) except StopIteration: return jsonify(title=generator.feed_title, entries=[], next_url=None, read_url=url_for( 'read_all_entries', feed_id=feed_id, last_updated=(updated_at or now()).isoformat(), _external=True)) save_entry_generators(url_token, generator) tidy_generators_up() entries = generator.get_entries() if len(entries) < app.config['PAGE_SIZE']: next_url = None if not entries: remove_entry_generator(url_token) else: next_url = make_next_url(category_id, url_token, entries[-1]['entry_id'], read, starred, feed_id) response = jsonify(title=text_type(feed.title), entries=entries, next_url=next_url, read_url=url_for('read_all_entries', feed_id=feed_id, last_updated=(updated_at or now()).isoformat(), _external=True)) if feed.__revision__: response.last_modified = updated_at return response