예제 #1
0
    def test_missing_colon(self):
        record = NameValueRecord()

        self.assertRaises(ValueError, record.parse, 'text:hello\nhi\n')

        record = NameValueRecord()

        record.parse('text:hello\nhi\n', strict=False)

        self.assertEqual('hello', record['text'])
        self.assertNotIn('hi', record)
예제 #2
0
 def test_name_value_str_format(self):
     record = NameValueRecord()
     record.parse(self.RECORD_STR_1)
     self.assertEqual(('Entry:\r\n'
                       'Who: Gilbert, W.S. | Sullivan, Arthur\r\n'
                       'What: The Yeomen of the Guard\r\n'
                       'When/Created: 1888\r\n'), str(record))
예제 #3
0
    def test_name_value_fallback(self):
        text = '''Name: Кракозябры'''.encode('koi8-r')
        record = NameValueRecord()
        record.parse(text)

        self.assertEqual('Кракозябры'.encode('koi8-r').decode('latin1'),
                         record['name'])
예제 #4
0
 def __init__(self, version, status_code, status_reason):
     self.version = version
     self.status_code = status_code
     self.status_reason = status_reason
     self.fields = NameValueRecord()
     self.body = Body()
     self.url_info = None
예제 #5
0
    def _build_phantomjs_coprocessor(cls, session: AppSession,
                                     proxy_port: int):
        '''Build proxy server and PhantomJS client. controller, coprocessor.'''
        page_settings = {}
        default_headers = NameValueRecord()

        for header_string in session.args.header:
            default_headers.parse(header_string)

        # Since we can only pass a one-to-one mapping to PhantomJS,
        # we put these last since NameValueRecord.items() will use only the
        # first value added for each key.
        default_headers.add('Accept-Language', '*')

        if not session.args.http_compression:
            default_headers.add('Accept-Encoding', 'identity')

        default_headers = dict(default_headers.items())

        if session.args.read_timeout:
            page_settings['resourceTimeout'] = session.args.read_timeout * 1000

        page_settings['userAgent'] = session.args.user_agent \
                                     or session.default_user_agent

        # Test early for executable
        wpull.driver.phantomjs.get_version(session.args.phantomjs_exe)

        phantomjs_params = PhantomJSParams(
            wait_time=session.args.phantomjs_wait,
            num_scrolls=session.args.phantomjs_scroll,
            smart_scroll=session.args.phantomjs_smart_scroll,
            snapshot=session.args.phantomjs_snapshot,
            custom_headers=default_headers,
            page_settings=page_settings,
            load_time=session.args.phantomjs_max_time,
        )

        extra_args = [
            '--proxy', '{}:{}'.format(session.args.proxy_server_address,
                                      proxy_port), '--ignore-ssl-errors=true'
        ]

        phantomjs_driver_factory = functools.partial(
            session.factory.class_map['PhantomJSDriver'],
            exe_path=session.args.phantomjs_exe,
            extra_args=extra_args,
        )

        phantomjs_coprocessor = session.factory.new(
            'PhantomJSCoprocessor',
            phantomjs_driver_factory,
            session.factory['ProcessingRule'],
            phantomjs_params,
            root_path=session.args.directory_prefix,
            warc_recorder=session.factory.get('WARCRecorder'),
        )

        return phantomjs_coprocessor
예제 #6
0
    def test_name_value_encoding(self):
        text = '''Name: Кракозябры'''.encode('koi8-r')
        record = NameValueRecord(encoding='koi8-r')
        record.parse(text)

        self.assertEqual(
            'Кракозябры',
            record['name'])
예제 #7
0
 def __init__(self, method, resource_url, version='HTTP/1.1'):
     self.method = method
     self.resource_url = resource_url
     self.url_info = None
     self.version = version
     self.fields = NameValueRecord()
     self.body = Body()
     self.address = None
예제 #8
0
 def __init__(self, method=None, resource_path=None, version='HTTP/1.1'):
     super().__init__()
     self.method = method
     self.resource_path = resource_path
     self.version = version
     self.fields = NameValueRecord(encoding='latin-1')
     self.body = None
     self.encoding = 'latin-1'
예제 #9
0
    def test_name_value_record_setters(self):
        record = NameValueRecord()

        self.assertNotIn('cache', record)
        self.assertRaises(KeyError, lambda: record['cache'])
        record['cache'] = 'value1'
        self.assertEqual('value1', record['CACHE'])
        self.assertEqual(['value1'], record.get_list('Cache'))
        self.assertEqual([('Cache', 'value1')], list(record.get_all()))
예제 #10
0
    def test_wrap_width(self):
        record = NameValueRecord(wrap_width=24)
        record['blah'] = 'hello ' * 10

        self.assertEqual(
            'Blah: hello hello hello hello\r\n'
            '  hello hello hello \r\n'
            ' hello hello hello \r\n',
            str(record)
        )
예제 #11
0
    def _new_mock_response(self, response, file_path):
        '''Return a new mock Response with the content.'''
        mock_response = copy.copy(response)

        mock_response.body = Body(open(file_path, 'rb'))
        mock_response.fields = NameValueRecord()

        for name, value in response.fields.get_all():
            mock_response.fields.add(name, value)

        mock_response.fields['Content-Type'] = 'text/html; charset="utf-8"'

        return mock_response
예제 #12
0
    def _build_phantomjs_controller(self):
        '''Build proxy server and PhantomJS client and controller.'''
        if not self._args.phantomjs:
            return

        proxy_server = self._factory.new('HTTPProxyServer',
                                         self.factory['Client'])
        proxy_socket, proxy_port = tornado.testing.bind_unused_port()

        proxy_server.add_socket(proxy_socket)

        page_settings = {}
        default_headers = NameValueRecord()

        for header_string in self._args.header:
            default_headers.parse(header_string)

        # Since we can only pass a one-to-one mapping to PhantomJS,
        # we put these last since NameValueRecord.items() will use only the
        # first value added for each key.
        default_headers.add('Accept-Language', '*')

        if not self._args.http_compression:
            default_headers.add('Accept-Encoding', 'identity')

        default_headers = dict(default_headers.items())

        if self._args.read_timeout:
            page_settings['resourceTimeout'] = self._args.read_timeout * 1000

        page_settings['userAgent'] = self._args.user_agent \
            or self.default_user_agent

        phantomjs_client = self._factory.new(
            'PhantomJSClient',
            'localhost:{0}'.format(proxy_port),
            page_settings=page_settings,
            default_headers=default_headers,
        )
        phantomjs_client.test_client_exe()

        phantomjs_controller = self._factory.new(
            'PhantomJSController',
            phantomjs_client,
            wait_time=self._args.phantomjs_wait,
            num_scrolls=self._args.phantomjs_scroll,
            warc_recorder=self.factory.get('WARCRecorder'),
            smart_scroll=self._args.phantomjs_smart_scroll,
        )

        return phantomjs_controller
예제 #13
0
    def test_mixed_line_ending(self):
        record = NameValueRecord()
        record.parse(self.MIXED_LINE_ENDING_STR_1)

        self.assertEqual('woof', record['dog'])
        self.assertEqual('meow', record['cat'])
        self.assertEqual('tweet', record['bird'])
        self.assertEqual('squeak', record['mouse'])
        self.assertEqual('moo', record['cow'])
        self.assertEqual('croak', record['frog'])
        self.assertEqual('toot', record['elephant'])
        self.assertEqual('quack', record['duck'])
        self.assertEqual('blub', record['fish'])
        self.assertEqual('ow ow ow', record['seal'])
        self.assertEqual('???', record['fox'])
예제 #14
0
    def __init__(self,
                 status_code=None,
                 reason=None,
                 version='HTTP/1.1',
                 request=None):
        if status_code is not None:
            assert isinstance(status_code, int), \
                'Expect int, got {}'.format(type(status_code))
            assert reason is not None

        self.status_code = status_code
        self.reason = reason
        self.version = version
        self.fields = NameValueRecord(encoding='latin-1')
        self.body = None
        self.request = request
        self.encoding = 'latin-1'
예제 #15
0
    def _new_phantomjs_response(self, response, content):
        '''Return a new mock Response with the content.'''
        mock_response = copy.copy(response)

        # tempfile needed for scripts that need a on-disk filename
        mock_response.body.content_file = tempfile.SpooledTemporaryFile(
            max_size=999999999)

        mock_response.body.content_file.write(content.encode('utf-8'))
        mock_response.body.content_file.seek(0)

        mock_response.fields = NameValueRecord()

        for name, value in response.fields.get_all():
            mock_response.fields.add(name, value)

        mock_response.fields['Content-Type'] = 'text/html; charset="utf-8"'

        return mock_response
예제 #16
0
    def _populate_warcinfo(self, extra_fields=None):
        '''Add the metadata to the Warcinfo record.'''
        self._warcinfo_record.set_common_fields(
            WARCRecord.WARCINFO, WARCRecord.WARC_FIELDS)

        info_fields = NameValueRecord()
        info_fields['Software'] = 'Wpull/{0} Python/{1}'.format(
            wpull.version.__version__, wpull.util.python_version())
        info_fields['format'] = 'WARC File Format 1.0'
        info_fields['conformsTo'] = \
            'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf'

        if extra_fields:
            for name, value in extra_fields:
                info_fields.add(name, value)

        self._warcinfo_record.block_file = io.BytesIO(
            bytes(info_fields) + b'\r\n')
        self._warcinfo_record.compute_checksum()
예제 #17
0
    def _populate_warcinfo(self, extra_fields=None):
        '''Add the metadata to the Warcinfo record.'''
        self._warcinfo_record.set_common_fields(WARCRecord.WARCINFO,
                                                WARCRecord.WARC_FIELDS)

        info_fields = NameValueRecord()
        info_fields['Software'] = self._params.software_string \
            or self.DEFAULT_SOFTWARE_STRING
        info_fields['format'] = 'WARC File Format 1.0'
        info_fields['conformsTo'] = \
            'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf'

        if extra_fields:
            for name, value in extra_fields:
                info_fields.add(name, value)

        self._warcinfo_record.block_file = io.BytesIO(
            bytes(info_fields) + b'\r\n')
        self._warcinfo_record.compute_checksum()
예제 #18
0
    def test_with_normalize_overrides(self):
        record = NameValueRecord(normalize_overrides=['WARC-Type'])

        record.add('WARC-Type', 'warcinfo')

        self.assertIn('WARC-Type', record)
        self.assertEqual('warcinfo', record['WARC-Type'])
        self.assertEqual([('WARC-Type', 'warcinfo')], list(record.get_all()))
        self.assertEqual(['warcinfo'], record.get_list('Warc-Type'))
        self.assertEqual(['WARC-Type'], list(record.keys()))

        record['Warc-Type'] = 'resource'

        self.assertIn('WARC-Type', record)
        self.assertEqual('resource', record['WARC-Type'])
        self.assertEqual([('WARC-Type', 'resource')], list(record.get_all()))
        self.assertEqual(['resource'], record.get_list('Warc-Type'))
        self.assertEqual(['WARC-Type'], list(record.keys()))

        record['WARC-Blah'] = 'blah'
        self.assertEqual(['WARC-Type', 'Warc-Blah'], list(record.keys()))
예제 #19
0
 def __init__(self):
     self.fields = NameValueRecord(normalize_overrides=self.NAME_OVERRIDES)
     self.block_file = None
예제 #20
0
    def test_name_value_utf8(self):
        text = '''Name: dogé'''
        record = NameValueRecord()
        record.parse(text)

        self.assertEqual('dogé', record['name'])
예제 #21
0
 def test_name_value_record_parsing(self):
     record = NameValueRecord()
     record.parse(self.RECORD_STR_1)
     self.assertIn('who', record)
     self.assertEqual('Gilbert, W.S. | Sullivan, Arthur', record['who'])
예제 #22
0
    def test_copy(self):
        record = NameValueRecord()
        record['blah'] = 'hello'

        # Check for no crash
        copy.deepcopy(record)