示例#1
0
    def test__make_request(self):
        c = Crawler("http://test.com/")

        with mock.patch("Crawler.requests") as mock_requests:
            mock_requests.get.return_value = mock_response = mock.Mock()
            mock_response.text = True

            # Make sure it ignores non-200 responses
            mock_response.status_code = 404
            self.assertEqual(c._make_request(""), None)

            mock_response.status_code = 200

            # Make sure it ignores non-html responses
            mock_response.headers = {
                "content-type": "text/javascript"
            }
            self.assertEqual(c._make_request(""), None)

            mock_response.headers = {
                "content-type": "text/html"
            }
            # Make sure it ignores non-html responses
            self.assertEqual(c._make_request(""), True)
示例#2
0
    def test__process_next_url_blacklist(self):
        c = Crawler("http://a.com")
        c.bad_urls = {"http://a.com/a/b/c/": True}
        c.process_q.append("http://a.com/a/b/c/")

        c._make_request = mock.Mock(return_value=None)
        c._process_html = mock.Mock()

        c._process_next_url()
        self.assertEqual(len(c.process_q), 1)
        self.assertEqual(len(c.bad_urls), 2)

        c._process_next_url()
        self.assertEqual(len(c.process_q), 0)
        self.assertEqual(len(c.bad_urls), 2)

        self.assertEqual(c._process_html.call_count, 0)