def test_proceed_backward_crawler(self): server_id = "a" * 32 cookies = {'SERVER_ID': server_id} with patch("crawler.tasks.requests") as requests_mock: requests_mock.get.return_value = Mock( status_code=200, json=Mock( return_value={ 'data': [{ "id": uid } for uid in range(API_LIMIT)], 'next_page': { 'offset': -2 }, 'prev_page': { 'offset': 0 }, }), cookies=Mock(get_dict=Mock(return_value=cookies)), ) process_feed.apply_async = Mock() process_feed(offset=-1, descending=1, cookies=cookies) requests_mock.get.assert_called_once_with( FEED_URL_TEMPLATE.format( host=PUBLIC_API_HOST, version=API_VERSION, resource="tenders", ), params=dict(feed="changes", descending=1, offset=-1, mode="_all_", limit=API_LIMIT, opt_fields=",".join(TENDER_OPT_FIELDS)), cookies={'SERVER_ID': server_id}, timeout=(CONNECT_TIMEOUT, READ_TIMEOUT), ) process_feed.apply_async.assert_called_once_with( kwargs={ 'resource': 'tenders', 'mode': '_all_', 'offset': -2, 'descending': 1, 'cookies': { 'SERVER_ID': server_id } })
def test_call_item_handlers(self, requests_mock): cookies = {"some_cookie": "some_test_cookie"} requests_mock.get.return_value = Mock( status_code=200, json=Mock(return_value={ 'data': [ {"id": "a" * 32}, {"id": "b" * 32}, ], 'next_page': {'offset': 2} }), cookies=Mock(get_dict=Mock(return_value=cookies)), ) item_handlers = [ Mock(__name__='first_handler'), Mock(__name__='second_handler'), ] disabled_item_handlers = [ Mock(__name__='third_handler'), Mock(__name__='fourth_handler'), ] class Builder(ResourceConfigBuilder): handlers = item_handlers + disabled_item_handlers enabled_handlers_names = ["first_handler", "second_handler", "whatever"] opt_fields = ("first_field", "second_field") configs = ResourceConfigProvider() configs.register_builder("tenders", Builder()) with patch("crawler.tasks.resources.configs", configs): process_feed(offset=1) for handler in item_handlers: self.assertEqual( handler.call_args_list, [ call({'id': 'a' * 32}), call({'id': 'b' * 32}) ] ) for handler in disabled_item_handlers: handler.assert_not_called()
def test_start_crawler_on_empty_feed(self): server_id = "a" * 32 cookies = {'SERVER_ID': server_id} with patch("crawler.tasks.requests") as requests_mock: requests_mock.get.return_value = Mock( status_code=200, cookies=Mock(get_dict=Mock(return_value=cookies)), json=Mock(return_value={ 'data': [], 'next_page': { 'offset': "" } }), ) process_feed.apply_async = Mock() process_feed() requests_mock.get.assert_called_once_with( FEED_URL_TEMPLATE.format( host=PUBLIC_API_HOST, version=API_VERSION, resource="tenders", ), params=dict(feed="changes", limit=API_LIMIT, descending="1", mode="_all_", opt_fields=",".join(TENDER_OPT_FIELDS)), cookies={}, timeout=(CONNECT_TIMEOUT, READ_TIMEOUT), ) self.assertEqual( process_feed.apply_async.call_args_list, [ call(countdown=60, kwargs={ 'resource': 'tenders', 'mode': '_all_', 'cookies': { 'SERVER_ID': server_id }, 'try_count': 1, }), ], msg= "Only forward crawling after initialization on empty feed response" )
def test_handle_feed_offset_error(self): cookies = {"SERVER_ID": "2" * 32} with patch("crawler.tasks.requests") as requests_mock: requests_mock.get.return_value = Mock( status_code=404, json=Mock( return_value={ "status": "error", "errors": [{ "location": "params", "name": "offset", "description": "Offset expired/invalid" }] }), ) process_feed.retry = Mock(side_effect=Retry) with self.assertRaises(Retry): process_feed(offset="1" * 32, cookies=cookies) process_feed.retry.assert_called_once_with(kwargs=dict( cookies=cookies))