示例#1
0
    def fetch(self, request):
        '''Fulfill a request.

        Args:
            request (:class:`.ftp.request.Request`): Request.

        Returns:
            .ftp.request.Response: A Response populated with the initial
            data connection reply.

        Once the response is received, call :meth:`read_content`.

        Coroutine.
        '''
        response = Response()

        yield From(self._prepare_fetch(request, response))

        response.file_transfer_size = yield From(self._fetch_size(request))

        if request.restart_value:
            try:
                yield From(self._commander.restart(request.restart_value))
                response.restart_value = request.restart_value
            except FTPServerError:
                _logger.debug('Could not restart file.', exc_info=1)

        yield From(self._open_data_stream())

        command = Command('RETR', request.file_path)

        yield From(self._begin_stream(command))

        raise Return(response)
示例#2
0
    def fetch(self, request):
        '''Fulfill a request.

        Args:
            request (:class:`.ftp.request.Request`): Request.

        Returns:
            .ftp.request.Response: A Response populated with the initial
            data connection reply.

        Once the response is received, call :meth:`read_content`.

        Coroutine.
        '''
        response = Response()

        yield From(self._prepare_fetch(request, response))

        response.file_transfer_size = yield From(self._fetch_size(request))

        if request.restart_value:
            try:
                yield From(self._commander.restart(request.restart_value))
                response.restart_value = request.restart_value
            except FTPServerError:
                _logger.debug('Could not restart file.', exc_info=1)

        yield From(self._open_data_stream())

        command = Command('RETR', request.file_path)

        yield From(self._begin_stream(command))

        raise Return(response)
示例#3
0
    def test_to_dict(self):
        request = Request('ftp://foofle.com')
        request_dict = request.to_dict()

        self.assertEqual('ftp://foofle.com', request_dict['url'])
        self.assertEqual('ftp', request_dict['protocol'])

        response = Response()
        response.request = request
        response.reply = Reply(code=200, text='Success')
        response_dict = response.to_dict()

        self.assertEqual('ftp://foofle.com', response_dict['request']['url'])
        self.assertEqual('ftp', response_dict['protocol'])
        self.assertEqual(200, response_dict['reply']['code'])
        self.assertEqual(200, response_dict['response_code'])
        self.assertEqual('Success', response_dict['reply']['text'])
        self.assertEqual('Success', response_dict['response_message'])
示例#4
0
    def test_to_dict(self):
        request = Request('ftp://foofle.com')
        request_dict = request.to_dict()

        self.assertEqual('ftp://foofle.com', request_dict['url'])
        self.assertEqual('ftp', request_dict['protocol'])

        response = Response()
        response.request = request
        response.reply = Reply(code=200, text='Success')
        response_dict = response.to_dict()

        self.assertEqual('ftp://foofle.com', response_dict['request']['url'])
        self.assertEqual('ftp', response_dict['protocol'])
        self.assertEqual(200, response_dict['reply']['code'])
        self.assertEqual(200, response_dict['response_code'])
        self.assertEqual('Success', response_dict['reply']['text'])
        self.assertEqual('Success', response_dict['response_message'])
示例#5
0
    def test_to_dict_body(self):
        response = Response()
        response.body = Body()
        response_dict = response.to_dict()

        self.assertTrue(response_dict['body'])
        response.body.close()

        response = Response()
        response.body = NotImplemented
        response_dict = response.to_dict()

        self.assertFalse(response_dict['body'])
示例#6
0
    def test_to_dict_body(self):
        response = Response()
        response.body = Body()
        response_dict = response.to_dict()

        self.assertTrue(response_dict['body'])
        response.body.close()

        response = Response()
        response.body = NotImplemented
        response_dict = response.to_dict()

        self.assertFalse(response_dict['body'])
示例#7
0
    def test_warc_recorder_ftp(self):
        file_prefix = 'asdf'
        warc_filename = 'asdf.warc'

        warc_recorder = WARCRecorder(file_prefix,
                                     params=WARCRecorderParams(compress=False))

        request = FTPRequest('ftp://example.com/example.txt')
        request.address = ('0.0.0.0', 80)
        response = FTPResponse()
        response.reply = FTPReply(200, 'OK')
        response.body = Body()
        response.data_address = ('0.0.0.0', 12345)

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'KITTEH DOGE')

        with warc_recorder.session() as session:
            session.begin_control(request)
            session.request_control_data(b'GIMMEH example.txt')
            session.response_control_data(b'200 OK, no need to yell.')
            session.pre_response(response)
            session.response_data(b'KITTEH DOGE')
            session.response(response)
            session.end_control(response)

        warc_recorder.close()

        with open(warc_filename, 'rb') as in_file:
            warc_file_content = in_file.read()

        self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
        self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content)
        self.assertIn(b'Content-Type: application/warc-fields',
                      warc_file_content)
        self.assertIn(b'WARC-Date: ', warc_file_content)
        self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content)
        self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content)
        self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content)
        self.assertIn(b'WARC-Type: resource\r\n', warc_file_content)
        self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content)
        self.assertIn(b'Content-Type: application/octet-stream',
                      warc_file_content)
        self.assertIn(b'WARC-Type: metadata', warc_file_content)
        self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content)
        self.assertIn(b'Content-Type: text/x-ftp-control-conversation',
                      warc_file_content)
        self.assertIn(
            'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'),
            warc_file_content)
        self.assertIn(
            'Python/{0}'.format(wpull.util.python_version()).encode('utf-8'),
            warc_file_content)
        self.assertIn(b'KITTEH DOGE', warc_file_content)
        self.assertIn(b'* Opening control connection to', warc_file_content)
        self.assertIn(b'* Kept control connection to', warc_file_content)
        self.assertIn(b'* Opened data connection to ', warc_file_content)
        self.assertIn(b'* Closed data connection to ', warc_file_content)
        self.assertIn(b'> GIMMEH example.txt', warc_file_content)
        self.assertIn(b'< 200 OK, no need to yell.', warc_file_content)

        # Ignore Concurrent Record ID not seen yet
        self.validate_warc(warc_filename, ignore_minor_error=True)

        with open(warc_filename, 'r+b') as in_file:
            # Intentionally modify the contents
            in_file.seek(355)
            in_file.write(b'f')

        with self.assertRaises(Exception):
            # Sanity check that it actually raises error on bad digest
            self.validate_warc(warc_filename, ignore_minor_error=True)
示例#8
0
    def test_warc_recorder_ftp(self):
        file_prefix = 'asdf'
        warc_filename = 'asdf.warc'

        warc_recorder = WARCRecorder(
            file_prefix,
            params=WARCRecorderParams(compress=False)
        )

        request = FTPRequest('ftp://example.com/example.txt')
        request.address = ('0.0.0.0', 80)
        response = FTPResponse()
        response.reply = FTPReply(200, 'OK')
        response.body = Body()
        response.data_address = ('0.0.0.0', 12345)

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'KITTEH DOGE')

        with warc_recorder.session() as session:
            session.begin_control(request)
            session.request_control_data(b'GIMMEH example.txt')
            session.response_control_data(b'200 OK, no need to yell.')
            session.pre_response(response)
            session.response_data(b'KITTEH DOGE')
            session.response(response)
            session.end_control(response)

        warc_recorder.close()

        with open(warc_filename, 'rb') as in_file:
            warc_file_content = in_file.read()

        self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
        self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content)
        self.assertIn(b'Content-Type: application/warc-fields',
                      warc_file_content)
        self.assertIn(b'WARC-Date: ', warc_file_content)
        self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content)
        self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content)
        self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content)
        self.assertIn(b'WARC-Type: resource\r\n', warc_file_content)
        self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content)
        self.assertIn(b'Content-Type: application/octet-stream',
                      warc_file_content)
        self.assertIn(b'WARC-Type: metadata', warc_file_content)
        self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content)
        self.assertIn(b'Content-Type: text/x-ftp-control-conversation',
                      warc_file_content)
        self.assertIn(
            'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'),
            warc_file_content
        )
        self.assertIn(
            'Python/{0}'.format(
                wpull.util.python_version()).encode('utf-8'),
            warc_file_content
        )
        self.assertIn(b'KITTEH DOGE', warc_file_content)
        self.assertIn(b'* Opening control connection to', warc_file_content)
        self.assertIn(b'* Kept control connection to', warc_file_content)
        self.assertIn(b'* Opened data connection to ', warc_file_content)
        self.assertIn(b'* Closed data connection to ', warc_file_content)
        self.assertIn(b'> GIMMEH example.txt', warc_file_content)
        self.assertIn(b'< 200 OK, no need to yell.', warc_file_content)

        # Ignore Concurrent Record ID not seen yet
        self.validate_warc(warc_filename, ignore_minor_error=True)

        with open(warc_filename, 'r+b') as in_file:
            # Intentionally modify the contents
            in_file.seek(355)
            in_file.write(b'f')

        with self.assertRaises(Exception):
            # Sanity check that it actually raises error on bad digest
            self.validate_warc(warc_filename, ignore_minor_error=True)