Пример #1
0
    def test_should_validate_properties_when_passing_url_and_text(self, dummy_data):
        response = BaseStaticResponse(url='http://foo.com', text=dummy_data.decode())

        assert 'http://foo.com' == response.url
        assert_dicts({}, response.headers)
        assert_dicts({}, response.cookies)
        assert dummy_data.decode() == response.text
        assert dummy_data == response.content
Пример #2
0
 def test_should_validate_attributes(self):
     fields = {attribute.name: attribute.type for attribute in attr.fields(BaseStaticResponse)}
     attributes = {
         '_httpx_response': Union[httpx._models.Response, type(None)],
         '_url': str,
         '_text': str,
         '_selector': parsel.Selector,
     }
     assert_dicts(fields, attributes)
Пример #3
0
    def test_should_correctly_instantiate_class_without_giving_httpx_response(self, tmp_path):
        analyzer = RobotsAnalyzer(user_agent='Mozilla/5.0', robots_cache=tmp_path)

        assert 'Mozilla/5.0' == analyzer._user_agent
        assert tmp_path == analyzer._robots_cache
        assert isinstance(analyzer._http_client, httpx.Client)
        assert 'Mozilla/5.0' == analyzer._http_client.headers['User-Agent']
        assert isinstance(analyzer._robots_parser, RobotFileParser)
        assert_dicts(analyzer._robots_mapping, {})
        assert_dicts(analyzer._delay_mapping, {})
Пример #4
0
    def test_should_validate_properties_when_passing_httpx_response(self):
        url = 'http://foobar.com'
        request = httpx.Request('GET', url)
        headers = {'foo': 'bar', 'set-cookie': 'name=John'}
        content = b'hello world'
        httpx_response = httpx.Response(200, request=request, headers=headers, content=content)
        response = BaseStaticResponse(httpx_response=httpx_response)

        assert url == response.url
        assert_dicts({**headers, 'content-length': '11'}, response.headers)
        assert_dicts({'name': 'John'}, response.cookies)
        assert content == response.content
        assert content.decode() == response.text
Пример #5
0
    def test_should_return_non_empty_dict_when_scalpel_attributes_found(self):
        data = {
            'name': 'paul',
            'scalpel': {
                'min_request_delay': 1,
                'foo': 'bar',
                'USER_AGENT': 'Mozilla/5.0',
                'fruit': 'pineapple',
                '_config': 'foobar'
            }
        }
        expected = {'min_request_delay': 1, 'user_agent': 'Mozilla/5.0'}

        assert_dicts(expected, Configuration._scalpel_attributes(data))
Пример #6
0
    def test_should_correctly_instantiate_class_with_httpx_response_passed_as_argument(
            self, trio_tmp_path):
        http_client = httpx.AsyncClient(headers={'User-Agent': 'python-httpx'})
        analyzer = RobotsAnalyzer(user_agent='Mozilla/5.0',
                                  robots_cache=trio_tmp_path,
                                  http_client=http_client)

        assert 'Mozilla/5.0' == analyzer._user_agent
        assert trio_tmp_path == analyzer._robots_cache
        assert isinstance(analyzer._http_client, httpx.AsyncClient)
        assert 'python-httpx' == analyzer._http_client.headers['User-Agent']
        assert isinstance(analyzer._robots_parser, RobotFileParser)
        assert_dicts(analyzer._robots_mapping, {})
        assert_dicts(analyzer._delay_mapping, {})
Пример #7
0
 def test_should_return_correct_dict_given_correct_input(self, given_dict, expected_dict):
     assert_dicts(expected_dict, Configuration._get_dict_with_lower_keys(given_dict))