def on_bookmark_created(message: PickleMessage, ctx: missive.HandlingContext): """When a new bookmark is created, we want to: - crawl it, if it's not yet crawled - (tbc) other things """ event = cast(BookmarkCreated, message.get_obj()) session = get_session(ctx) url = get_url_by_url_uuid(session, event.url_uuid) if url is None: raise RuntimeError("url requested to crawl does not exist in the db") if not is_crawled(session, url): publish_message( CrawlRequested(crawl_request=CrawlRequest( request=Request(verb=HTTPVerb.GET, url=url), reason=BookmarkCrawlReason(), )), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) for source in DiscussionSource: publish_message( FetchDiscussionsCommand(url_uuid=url.url_uuid, source=source), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) ctx.ack()
def test_publish_message(test_queue): """Test that simple sending of messages works correctly""" sent_message = BookmarkCreated(uuid4(), uuid4()) publish_message(sent_message, routing_key=test_queue.name) received_message = pickle.loads(test_queue.get().body) assert received_message == sent_message
def reindex_url(url: str, log_level: str): url_obj = URL.from_string(url) Session = get_session_cls() with contextlib.closing(Session()) as session: crawl_uuid = get_most_recent_crawl(session, url_obj) publish_message(IndexRequested(crawl_uuid), environ["QM_RABBITMQ_BG_WORKER_TOPIC"]) log.info("requested index of %s (crawl_uuid: %s)", url_obj, crawl_uuid)
def request_indexes_for_unindexed_urls(session: Session) -> None: index = 0 for index, (url, crawl_uuid) in enumerate(get_unindexed_urls(session), start=1): publish_message(IndexRequested(crawl_uuid), environ["QM_RABBITMQ_BG_WORKER_TOPIC"]) log.info("requested index: %s", url.to_string()) log.info("requested %d indexes", index)
def fetch_frontier(limit: Optional[int]): Session = get_session_cls() count = 0 with contextlib.closing(Session()) as session: frontier = DiscussionFrontier(session) for url_uuid, discussion_source in frontier.iter(limit): publish_message( FetchDiscussionsCommand(url_uuid, discussion_source), routing_key=environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) count += 1 log.info("requested %d fetches", count)
def reindex_bookmarks(log_level: str): """Requests an (re)index of the most recent crawl for each bookmark.""" configure_logging(log_level) log.warning("requesting reindex of all bookmarks") Session = get_session_cls() index = 0 with contextlib.closing(Session()) as session: for index, crawl_uuid in enumerate( most_recent_successful_bookmark_crawls(session)): publish_message(IndexRequested(crawl_uuid), environ["QM_RABBITMQ_BG_WORKER_TOPIC"]) log.warning("requested %d indexings", index)
def on_index_requested(message: PickleMessage, ctx: missive.HandlingContext): event = cast(IndexRequested, message.get_obj()) session = get_session(ctx) metadata = indexing.index(session, event.crawl_uuid) if metadata: icon_message = icon_message_if_necessary(session, metadata) else: icon_message = None session.commit() ctx.ack() if icon_message: publish_message(icon_message, environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
def send_hello(message, loop): routing_key: str = environ["QM_RABBITMQ_BG_WORKER_TOPIC"] # call this for side-effects - to ensure things are set up so that the # timing numbers are accurate get_producer() hello_event = HelloEvent(message) publish_message(hello_event, routing_key=routing_key) if loop: while True: hello_event = HelloEvent(message) publish_message(hello_event, routing_key=routing_key)
def on_bookmark_crawl_requested(message: PickleMessage, ctx: missive.HandlingContext): event = cast(CrawlRequested, message.get_obj()) session = get_session(ctx) http_client = get_http_client(ctx) crawl_result = crawler.crawl(session, http_client, event.crawl_request.request) session.commit() publish_message( IndexRequested(crawl_uuid=crawl_result.crawl_uuid), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) ctx.ack()
def create_bookmark(username: str) -> flask.Response: owner = get_user_or_fail(db.session, username) # FIXME: sort out optional url_uuid require_access_or_fail( UserBookmarksAccessObject(user_uuid=owner.user_uuid), Access.WRITE, ) form = flask.request.form creation_time = datetime.utcnow().replace(tzinfo=timezone.utc) tag_triples = tag_triples_from_form(form) url_str = form["url"] try: # As it's a user entering this url, help them along with getting a # sufficiently canonicalised url url = URL.from_string(url_str, coerce_canonicalisation=True) except DisallowedSchemeException: log.warning("user tried to create url: %s (disallowed scheme)", url_str) flask.abort(400, "invalid url (disallowed scheme)") bookmark = Bookmark( url=url, title=form["title"], description=form["description"], unread="unread" in form, deleted=False, updated=creation_time, created=creation_time, tag_triples=tag_triples, ) url_uuid = set_bookmark(db.session, get_cache(), owner.user_uuid, bookmark) db.session.commit() publish_message( message_lib.BookmarkCreated(user_uuid=owner.user_uuid, url_uuid=url.url_uuid), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) flask.flash("Bookmarked: %s" % bookmark.title) response = flask.make_response("Redirecting...", 303) response.headers["Location"] = flask.url_for( "quarchive.edit_bookmark_form", url_uuid=url_uuid, username=owner.username, ) return response
def sync(current_user: User) -> Tuple[flask.Response, int]: start_time = datetime.utcnow() extension_version = flask.request.headers.get( "Quarchive-Extension-Version", "unknown" ) log.debug("extension version: %s", extension_version) user_uuid = current_user.user_uuid use_jsonlines = flask.request.headers["Content-Type"] != "application/json" if not use_jsonlines: log.warning("sync request using deprecated single json object") body = flask.request.json recieved_bookmarks = (Bookmark.from_json(item) for item in body["bookmarks"]) else: log.info("sync request using jsonlines") recieved_bookmarks = ( Bookmark.from_json(json.loads(l)) for l in flask.request.stream.readlines() ) try: merge_result = merge_bookmarks(db.session, user_uuid, recieved_bookmarks) except BadCanonicalisationException as e: log.error( "bad canonicalised url ('%s') from version %s, user %s", e.url_string, extension_version, current_user, ) db.session.rollback() flask.abort(400, "bad canonicalisation on url: %s" % e.url_string) db.session.commit() for added in merge_result.added: publish_message( message_lib.BookmarkCreated( user_uuid=user_uuid, url_uuid=added.url.url_uuid ), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) is_full_sync = "full" in flask.request.args if is_full_sync: response_bookmarks = all_bookmarks(db.session, current_user.user_uuid) else: response_bookmarks = merge_result.changed # If we got JSON, send json back if not use_jsonlines: return flask.json.jsonify( {"bookmarks": [b.to_json() for b in response_bookmarks]} ) else: def generator(): for b in response_bookmarks: yield json.dumps(b.to_json()) yield "\n" if is_full_sync: duration = datetime.utcnow() - start_time log.info( "completed full sync for %s in %ds", current_user.username, duration.total_seconds(), ) return ( flask.Response( flask.stream_with_context(generator()), mimetype="application/x-ndjson", ), 200, )
def fetch(url: str): url_obj = URL.from_string(url) event = FetchDiscussionsCommand(url_obj.url_uuid, DiscussionSource.HN) publish_message(event, routing_key=environ["QM_RABBITMQ_BG_WORKER_TOPIC"]) event = FetchDiscussionsCommand(url_obj.url_uuid, DiscussionSource.REDDIT) publish_message(event, routing_key=environ["QM_RABBITMQ_BG_WORKER_TOPIC"])