Пример #1
0
    def test_proceed_backward_crawler(self):
        server_id = "a" * 32
        cookies = {'SERVER_ID': server_id}
        with patch("crawler.tasks.requests") as requests_mock:
            requests_mock.get.return_value = Mock(
                status_code=200,
                json=Mock(
                    return_value={
                        'data': [{
                            "id": uid
                        } for uid in range(API_LIMIT)],
                        'next_page': {
                            'offset': -2
                        },
                        'prev_page': {
                            'offset': 0
                        },
                    }),
                cookies=Mock(get_dict=Mock(return_value=cookies)),
            )
            process_feed.apply_async = Mock()
            process_feed(offset=-1, descending=1, cookies=cookies)

            requests_mock.get.assert_called_once_with(
                FEED_URL_TEMPLATE.format(
                    host=PUBLIC_API_HOST,
                    version=API_VERSION,
                    resource="tenders",
                ),
                params=dict(feed="changes",
                            descending=1,
                            offset=-1,
                            mode="_all_",
                            limit=API_LIMIT,
                            opt_fields=",".join(TENDER_OPT_FIELDS)),
                cookies={'SERVER_ID': server_id},
                timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
            )

        process_feed.apply_async.assert_called_once_with(
            kwargs={
                'resource': 'tenders',
                'mode': '_all_',
                'offset': -2,
                'descending': 1,
                'cookies': {
                    'SERVER_ID': server_id
                }
            })
Пример #2
0
    def test_call_item_handlers(self, requests_mock):
        cookies = {"some_cookie": "some_test_cookie"}

        requests_mock.get.return_value = Mock(
            status_code=200,
            json=Mock(return_value={
                'data': [
                    {"id": "a" * 32},
                    {"id": "b" * 32},
                ],
                'next_page': {'offset': 2}
            }),
            cookies=Mock(get_dict=Mock(return_value=cookies)),
        )

        item_handlers = [
            Mock(__name__='first_handler'),
            Mock(__name__='second_handler'),
        ]

        disabled_item_handlers = [
            Mock(__name__='third_handler'),
            Mock(__name__='fourth_handler'),
        ]

        class Builder(ResourceConfigBuilder):
            handlers = item_handlers + disabled_item_handlers
            enabled_handlers_names = ["first_handler", "second_handler", "whatever"]
            opt_fields = ("first_field", "second_field")

        configs = ResourceConfigProvider()
        configs.register_builder("tenders", Builder())

        with patch("crawler.tasks.resources.configs", configs):
            process_feed(offset=1)

        for handler in item_handlers:
            self.assertEqual(
                handler.call_args_list,
                [
                    call({'id': 'a' * 32}),
                    call({'id': 'b' * 32})
                ]
            )

        for handler in disabled_item_handlers:
            handler.assert_not_called()
Пример #3
0
    def test_start_crawler_on_empty_feed(self):
        server_id = "a" * 32
        cookies = {'SERVER_ID': server_id}
        with patch("crawler.tasks.requests") as requests_mock:
            requests_mock.get.return_value = Mock(
                status_code=200,
                cookies=Mock(get_dict=Mock(return_value=cookies)),
                json=Mock(return_value={
                    'data': [],
                    'next_page': {
                        'offset': ""
                    }
                }),
            )
            process_feed.apply_async = Mock()
            process_feed()

            requests_mock.get.assert_called_once_with(
                FEED_URL_TEMPLATE.format(
                    host=PUBLIC_API_HOST,
                    version=API_VERSION,
                    resource="tenders",
                ),
                params=dict(feed="changes",
                            limit=API_LIMIT,
                            descending="1",
                            mode="_all_",
                            opt_fields=",".join(TENDER_OPT_FIELDS)),
                cookies={},
                timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
            )
        self.assertEqual(
            process_feed.apply_async.call_args_list, [
                call(countdown=60,
                     kwargs={
                         'resource': 'tenders',
                         'mode': '_all_',
                         'cookies': {
                             'SERVER_ID': server_id
                         },
                         'try_count': 1,
                     }),
            ],
            msg=
            "Only forward crawling after initialization on empty feed response"
        )
Пример #4
0
    def test_handle_feed_offset_error(self):
        cookies = {"SERVER_ID": "2" * 32}
        with patch("crawler.tasks.requests") as requests_mock:
            requests_mock.get.return_value = Mock(
                status_code=404,
                json=Mock(
                    return_value={
                        "status":
                        "error",
                        "errors": [{
                            "location": "params",
                            "name": "offset",
                            "description": "Offset expired/invalid"
                        }]
                    }),
            )

            process_feed.retry = Mock(side_effect=Retry)
            with self.assertRaises(Retry):
                process_feed(offset="1" * 32, cookies=cookies)

            process_feed.retry.assert_called_once_with(kwargs=dict(
                cookies=cookies))