Пример #1
0
    def test_parse(self):
        lip = '127.0.0.1'
        tests = [
            ('http://127.0.0.1?c=v&c2=v2#fragment',     ('http', lip, lip, 80, '/?c=v&c2=v2')),
            ('http://127.0.0.1/?c=v&c2=v2#fragment',    ('http', lip, lip, 80, '/?c=v&c2=v2')),
            ('http://127.0.0.1/foo?c=v&c2=v2#frag',     ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
            ('http://127.0.0.1:100?c=v&c2=v2#fragment', ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
            ('http://127.0.0.1:100/?c=v&c2=v2#frag',    ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
            ('http://127.0.0.1:100/foo?c=v&c2=v2#frag', ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),

            ('http://127.0.0.1',              ('http', lip, lip, 80, '/')),
            ('http://127.0.0.1/',             ('http', lip, lip, 80, '/')),
            ('http://127.0.0.1/foo',          ('http', lip, lip, 80, '/foo')),
            ('http://127.0.0.1?param=value',  ('http', lip, lip, 80, '/?param=value')),
            ('http://127.0.0.1/?param=value', ('http', lip, lip, 80, '/?param=value')),
            ('http://127.0.0.1:12345/foo',    ('http', lip + ':12345', lip, 12345, '/foo')),
            ('http://spam:12345/foo',         ('http', 'spam:12345', 'spam', 12345, '/foo')),
            ('http://spam.test.org/foo',      ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),

            ('https://127.0.0.1/foo',         ('https', lip, lip, 443, '/foo')),
            ('https://127.0.0.1/?param=value', ('https', lip, lip, 443, '/?param=value')),
            ('https://127.0.0.1:12345/',      ('https', lip + ':12345', lip, 12345, '/')),

            ('http://crawlmitest.org/foo ',    ('http', 'crawlmitest.org', 'crawlmitest.org', 80, '/foo')),
            ('http://egg:7890 ',              ('http', 'egg:7890', 'egg', 7890, '/')),
        ]

        f = CrawlmiHTPPClientFactory(Request(url='http://github.com/'))
        for url, test in tests:
            self.assertEqual(f._parse_url_args(url), test, url)
Пример #2
0
    def test_factory_info(self):
        def _cbFactoryInfo(ingnored_result, factory):
            self.assertEquals(factory.status, 200)
            self.assert_(factory.version.startswith('HTTP/'))
            self.assertEquals(factory.message, 'OK')
            self.assertEquals(factory.response_headers['content-length'], '10')

        url = self.get_url('file')
        factory = CrawlmiHTPPClientFactory(Request(url))
        scheme, netloc, host, port, path = factory._parse_url_args(url)
        reactor.connectTCP(host, port, factory)
        return factory.deferred.addCallback(_cbFactoryInfo, factory)
Пример #3
0
 def _clientfactory(*args, **kwargs):
     timeout = kwargs.pop('timeout', 0)
     download_size = kwargs.pop('download_size', 0)
     f = CrawlmiHTPPClientFactory(Request(*args, **kwargs),
                                  timeout=timeout,
                                  download_size=download_size)
     f.deferred.addCallback(lambda r: r.body)
     return f
Пример #4
0
 def download_request(self, request):
     '''Return a deferred for the HTTP download.'''
     factory = CrawlmiHTPPClientFactory(
         request, self.settings.get_float('DOWNLOAD_TIMEOUT', 180, request),
         self.settings.get_int('DOWNLOAD_SIZE_LIMIT', 0, request))
     host, port = factory.host, factory.port
     bind_address = request.meta.get('bind_address')
     reactor.connectTCP(host, port, factory, bindAddress=bind_address)
     return factory.deferred
Пример #5
0
    def test_factory_info(self):
        def _cbFactoryInfo(ingnored_result, factory):
            self.assertEquals(factory.status, 200)
            self.assert_(factory.version.startswith('HTTP/'))
            self.assertEquals(factory.message, 'OK')
            self.assertEquals(factory.response_headers['content-length'], '10')

        url = self.get_url('file')
        factory = CrawlmiHTPPClientFactory(Request(url))
        scheme, netloc, host, port, path = _parse_url_args(url)
        reactor.connectTCP(host, port, factory)
        return factory.deferred.addCallback(_cbFactoryInfo, factory)
Пример #6
0
 def test_invalid_status(self):
     transport = StringTransport()
     factory = CrawlmiHTPPClientFactory(Request(url='http://foo/bar'))
     protocol = CrawlmiHTTPClient()
     protocol.factory = factory
     protocol.makeConnection(transport)
     protocol.headers = Headers()
     protocol.dataReceived('HTTP/1.0 BUG OK\r\n')
     protocol.dataReceived('Hello: World\r\n')
     protocol.dataReceived('Foo: Bar\r\n')
     protocol.dataReceived('\r\n')
     protocol.handleResponse('')
     return self.assertFailure(factory.deferred, BadHttpHeaderError)
Пример #7
0
 def test_non_standard_line_endings(self):
     factory = CrawlmiHTPPClientFactory(Request(url='http://foo/bar'))
     protocol = CrawlmiHTTPClient()
     protocol.factory = factory
     protocol.headers = Headers()
     protocol.dataReceived('HTTP/1.0 200 OK\n')
     protocol.dataReceived('Hello: World\n')
     protocol.dataReceived('Foo: Bar\n')
     protocol.dataReceived('\n')
     self.assertEqual(protocol.headers,
                      Headers({
                          'Hello': ['World'],
                          'Foo': ['Bar']
                      }))
Пример #8
0
    def test_parse(self):
        lip = '127.0.0.1'
        tests = [
            ('http://127.0.0.1?c=v&c2=v2#fragment', ('http', lip, lip, 80,
                                                     '/?c=v&c2=v2')),
            ('http://127.0.0.1/?c=v&c2=v2#fragment', ('http', lip, lip, 80,
                                                      '/?c=v&c2=v2')),
            ('http://127.0.0.1/foo?c=v&c2=v2#frag', ('http', lip, lip, 80,
                                                     '/foo?c=v&c2=v2')),
            ('http://127.0.0.1:100?c=v&c2=v2#fragment',
             ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
            ('http://127.0.0.1:100/?c=v&c2=v2#frag',
             ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
            ('http://127.0.0.1:100/foo?c=v&c2=v2#frag',
             ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
            ('http://127.0.0.1', ('http', lip, lip, 80, '/')),
            ('http://127.0.0.1/', ('http', lip, lip, 80, '/')),
            ('http://127.0.0.1/foo', ('http', lip, lip, 80, '/foo')),
            ('http://127.0.0.1?param=value', ('http', lip, lip, 80,
                                              '/?param=value')),
            ('http://127.0.0.1/?param=value', ('http', lip, lip, 80,
                                               '/?param=value')),
            ('http://127.0.0.1:12345/foo', ('http', lip + ':12345', lip, 12345,
                                            '/foo')),
            ('http://spam:12345/foo', ('http', 'spam:12345', 'spam', 12345,
                                       '/foo')),
            ('http://spam.test.org/foo', ('http', 'spam.test.org',
                                          'spam.test.org', 80, '/foo')),
            ('https://127.0.0.1/foo', ('https', lip, lip, 443, '/foo')),
            ('https://127.0.0.1/?param=value', ('https', lip, lip, 443,
                                                '/?param=value')),
            ('https://127.0.0.1:12345/', ('https', lip + ':12345', lip, 12345,
                                          '/')),
            ('http://crawlmitest.org/foo ', ('http', 'crawlmitest.org',
                                             'crawlmitest.org', 80, '/foo')),
            ('http://egg:7890 ', ('http', 'egg:7890', 'egg', 7890, '/')),
        ]

        f = CrawlmiHTPPClientFactory(Request(url='http://github.com/'))
        for url, test in tests:
            self.assertEqual(_parse_url_args(url), test, url)
Пример #9
0
    def test_early_headers(self):
        # basic test stolen from twisted HTTPageGetter
        factory = CrawlmiHTPPClientFactory(
            Request(url='http://foo/bar',
                    body='some data',
                    headers={
                        'Host': 'example.net',
                        'User-Agent': 'fooble',
                        'Cookie': 'blah blah',
                        'Content-Length': '12981',
                        'Useful': 'value'
                    }))

        self._test(
            factory, 'GET /bar HTTP/1.0\r\n'
            'Content-Length: 9\r\n'
            'Useful: value\r\n'
            'Connection: close\r\n'
            'User-Agent: fooble\r\n'
            'Host: example.net\r\n'
            'Cookie: blah blah\r\n'
            '\r\n'
            'some data')

        # test minimal sent headers
        factory = CrawlmiHTPPClientFactory(Request('http://foo/bar'))
        self._test(factory, 'GET /bar HTTP/1.0\r\n' 'Host: foo\r\n' '\r\n')

        # test a simple POST with body and content-type
        factory = CrawlmiHTPPClientFactory(
            Request(
                method='POST',
                url='http://foo/bar',
                body='name=value',
                headers={'Content-Type': 'application/x-www-form-urlencoded'}))

        self._test(
            factory, 'POST /bar HTTP/1.0\r\n'
            'Host: foo\r\n'
            'Connection: close\r\n'
            'Content-Type: application/x-www-form-urlencoded\r\n'
            'Content-Length: 10\r\n'
            '\r\n'
            'name=value')

        # test with single and multivalued headers
        factory = CrawlmiHTPPClientFactory(
            Request(url='http://foo/bar',
                    headers={
                        'X-Meta-Single': 'single',
                        'X-Meta-Multivalued': ['value1', 'value2']
                    }))

        self._test(
            factory, 'GET /bar HTTP/1.0\r\n'
            'Host: foo\r\n'
            'X-Meta-Multivalued: value1\r\n'
            'X-Meta-Multivalued: value2\r\n'
            'X-Meta-Single: single\r\n'
            '\r\n')

        # same test with single and multivalued headers but using Headers class
        factory = CrawlmiHTPPClientFactory(
            Request(url='http://foo/bar',
                    headers=Headers({
                        'X-Meta-Single': 'single',
                        'X-Meta-Multivalued': ['value1', 'value2']
                    })))

        self._test(
            factory, 'GET /bar HTTP/1.0\r\n'
            'Host: foo\r\n'
            'X-Meta-Multivalued: value1\r\n'
            'X-Meta-Multivalued: value2\r\n'
            'X-Meta-Single: single\r\n'
            '\r\n')
Пример #10
0
 def _parse(self, url):
     f = CrawlmiHTPPClientFactory(Request(url=url))
     return (f.scheme, f.netloc, f.host, f.port, f.path)