Пример #1
0
    def test_login_no_password_required(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            request = Request(self.get_url('/example (copy).txt'))
            request.username = '******'
            yield from session.start(request)
            yield from session.download(file)
Пример #2
0
    def test_login_no_password_required(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            request = Request(self.get_url('/example (copy).txt'))
            request.username = '******'
            yield from session.start(request)
            yield from session.download(file)
Пример #3
0
    def test_fetch_file_restart_not_supported(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            request = Request(self.get_url('/example (copy).txt'))
            request.set_continue(99999)  # Magic value in the test server
            response = yield from session.start(request)
            self.assertFalse(response.restart_value)
            yield from session.download(file)

        self.assertEqual(
            'The real treasure is in Smaug’s heart 💗.\n'.encode('utf-8'),
            response.body.content())
Пример #4
0
    def test_fetch_file_restart(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            request = Request(self.get_url('/example (copy).txt'))
            request.set_continue(10)
            response = yield from session.start(request)
            self.assertEqual(10, response.restart_value)
            yield from session.download(file)

        self.assertEqual(
            'reasure is in Smaug’s heart 💗.\n'.encode('utf-8'),
            response.body.content())
Пример #5
0
    def test_fetch_file_restart(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            request = Request(self.get_url('/example (copy).txt'))
            request.set_continue(10)
            response = yield from session.start(request)
            self.assertEqual(10, response.restart_value)
            yield from session.download(file)

        self.assertEqual(
            'reasure is in Smaug’s heart 💗.\n'.encode('utf-8'),
            response.body.content()
        )
Пример #6
0
    def test_fetch_file_restart_not_supported(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            request = Request(self.get_url('/example (copy).txt'))
            request.set_continue(99999)  # Magic value in the test server
            response = yield from session.start(request)
            self.assertFalse(response.restart_value)
            yield from session.download(file)

        self.assertEqual(
            'The real treasure is in Smaug’s heart 💗.\n'.encode('utf-8'),
            response.body.content()
        )
Пример #7
0
    def process(self):
        '''Process.

        Coroutine.
        '''
        self._item_session.request = request = Request(self._item_session.url_record.url)
        verdict = self._fetch_rule.check_ftp_request(self._item_session)[0]

        if not verdict:
            self._item_session.skip()
            return

        self._add_request_password(request)

        dir_name, filename = self._item_session.url_record.url_info.split_path()
        if self._processor.fetch_params.glob and frozenset(filename) & GLOB_CHARS:
            request = self._to_directory_request(request)
            is_file = False
            self._glob_pattern = urllib.parse.unquote(filename)
        else:
            is_file = yield from self._prepare_request_file_vs_dir(request)

            self._file_writer_session.process_request(request)

        wait_time = yield from self._fetch(request, is_file)

        if wait_time:
            _logger.debug('Sleeping {0}.', wait_time)
            yield from asyncio.sleep(wait_time)
Пример #8
0
    def _prepare_fetch(self, request: Request, response: Response):
        '''Prepare for a fetch.

        Coroutine.
        '''
        self._request = request
        self._response = response

        yield from self._init_stream()

        connection_closed = self._control_connection.closed()

        if connection_closed:
            self._login_table.pop(self._control_connection, None)
            yield from self._control_stream.reconnect()

        request.address = self._control_connection.address

        connection_reused = not connection_closed
        self.event_dispatcher.notify(self.Event.begin_control,
                                     request,
                                     connection_reused=connection_reused)

        if connection_closed:
            yield from self._commander.read_welcome_message()

        yield from self._log_in()

        self._response.request = request
Пример #9
0
    def _prepare_fetch(self, request: Request, response: Response):
        """Prepare for a fetch.

        Coroutine.
        """
        self._request = request
        self._response = response

        yield from self._init_stream()

        connection_closed = self._control_connection.closed()

        if connection_closed:
            self._login_table.pop(self._control_connection, None)
            yield from self._control_stream.reconnect()

        request.address = self._control_connection.address

        connection_reused = not connection_closed
        self.event_dispatcher.notify(self.Event.begin_control, request, connection_reused=connection_reused)

        if connection_closed:
            yield from self._commander.read_welcome_message()

        yield from self._log_in()

        self._response.request = request
Пример #10
0
    def test_duration_timeout(self):
        client = Client()
        file = io.BytesIO()

        with self.assertRaises(DurationTimeout), client.session() as session:
            yield from \
                session.start(Request(self.get_url('/hidden/sleep.txt')))
            yield from session.download(file, duration_timeout=0.1)
Пример #11
0
    def test_to_dict(self):
        request = Request('ftp://foofle.com')
        request_dict = request.to_dict()

        self.assertEqual('ftp://foofle.com', request_dict['url'])
        self.assertEqual('ftp', request_dict['protocol'])

        response = Response()
        response.request = request
        response.reply = Reply(code=200, text='Success')
        response_dict = response.to_dict()

        self.assertEqual('ftp://foofle.com', response_dict['request']['url'])
        self.assertEqual('ftp', response_dict['protocol'])
        self.assertEqual(200, response_dict['reply']['code'])
        self.assertEqual(200, response_dict['response_code'])
        self.assertEqual('Success', response_dict['reply']['text'])
        self.assertEqual('Success', response_dict['response_message'])
Пример #12
0
    def test_to_dict(self):
        request = Request('ftp://foofle.com')
        request_dict = request.to_dict()

        self.assertEqual('ftp://foofle.com', request_dict['url'])
        self.assertEqual('ftp', request_dict['protocol'])

        response = Response()
        response.request = request
        response.reply = Reply(code=200, text='Success')
        response_dict = response.to_dict()

        self.assertEqual('ftp://foofle.com', response_dict['request']['url'])
        self.assertEqual('ftp', response_dict['protocol'])
        self.assertEqual(200, response_dict['reply']['code'])
        self.assertEqual(200, response_dict['response_code'])
        self.assertEqual('Success', response_dict['reply']['text'])
        self.assertEqual('Success', response_dict['response_message'])
Пример #13
0
    def test_fetch_file(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            response = yield from \
                session.start(Request(self.get_url('/example (copy).txt')))
            yield from session.download(file)

        self.assertEqual(
            'The real treasure is in Smaug’s heart 💗.\n'.encode('utf-8'),
            response.body.content())
Пример #14
0
    def test_fetch_no_file(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            try:
                yield from \
                    session.start(Request(self.get_url('/asdf.txt')))
                yield from session.download(file)
            except FTPServerError as error:
                self.assertEqual(550, error.reply_code)
            else:
                self.fail()  # pragma: no cover
Пример #15
0
    def test_fetch_listing(self):
        client = Client()
        file = io.BytesIO()
        with client.session() as session:
            response = yield from \
                session.start_listing(Request(self.get_url('/')))
            yield from session.download_listing(file)

        print(response.body.content())
        self.assertEqual(5, len(response.files))
        self.assertEqual('junk', response.files[0].name)
        self.assertEqual('example1', response.files[1].name)
        self.assertEqual('example2💎', response.files[2].name)
        self.assertEqual('example (copy).txt', response.files[3].name)
        self.assertEqual('readme.txt', response.files[4].name)
Пример #16
0
    def test_fetch_bad_pasv_addr(self):
        client = Client()
        file = io.BytesIO()

        with client.session() as session:
            original_func = session._log_in

            @asyncio.coroutine
            def override_func():
                yield from original_func()
                yield from session._control_stream.write_command(
                    Command('EVIL_BAD_PASV_ADDR'))
                print('Evil awaits')

            # TODO: should probably have a way of sending custom commands
            session._log_in = override_func

            with self.assertRaises(ProtocolError):
                yield from \
                    session.start(Request(self.get_url('/example (copy).txt')))
Пример #17
0
    def _prepare_request_file_vs_dir(self, request: Request) -> bool:
        '''Check if file, modify request, and return whether is a file.

        Coroutine.
        '''
        if self._item_session.url_record.link_type:
            is_file = self._item_session.url_record.link_type == LinkType.file
        elif request.url_info.path.endswith('/'):
            is_file = False
        else:
            is_file = 'unknown'

        if is_file == 'unknown':
            files = yield from self._fetch_parent_path(request)

            if not files:
                return True

            filename = posixpath.basename(request.file_path)

            for file_entry in files:
                if file_entry.name == filename:
                    _logger.debug('Found entry in parent. Type {}',
                                  file_entry.type)
                    is_file = file_entry.type != 'dir'
                    break
            else:
                _logger.debug('Did not find entry. Assume file.')
                return True

            if not is_file:
                request.url = append_slash_to_path_url(request.url_info)
                _logger.debug('Request URL changed to {}. Path={}.',
                              request.url, request.file_path)

        return is_file
Пример #18
0
    def test_warc_recorder_ftp(self):
        file_prefix = 'asdf'
        warc_filename = 'asdf.warc'

        warc_recorder = WARCRecorder(
            file_prefix,
            params=WARCRecorderParams(compress=False)
        )

        request = FTPRequest('ftp://example.com/example.txt')
        request.address = ('0.0.0.0', 80)
        response = FTPResponse()
        response.reply = FTPReply(200, 'OK')
        response.body = Body()
        response.data_address = ('0.0.0.0', 12345)

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'KITTEH DOGE')

        session = warc_recorder.new_ftp_recorder_session()
        session.begin_control(request)
        session.control_send_data(b'GIMMEH example.txt')
        session.control_receive_data(b'200 OK, no need to yell.')
        session.begin_transfer(response)
        session.transfer_receive_data(b'KITTEH DOGE')
        session.end_transfer(response)
        session.end_control(response)
        session.close()

        warc_recorder.close()

        with open(warc_filename, 'rb') as in_file:
            warc_file_content = in_file.read()

        self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
        self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content)
        self.assertIn(b'Content-Type: application/warc-fields',
                      warc_file_content)
        self.assertIn(b'WARC-Date: ', warc_file_content)
        self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content)
        self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content)
        self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content)
        self.assertIn(b'WARC-Type: resource\r\n', warc_file_content)
        self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content)
        self.assertIn(b'Content-Type: application/octet-stream',
                      warc_file_content)
        self.assertIn(b'WARC-Type: metadata', warc_file_content)
        self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content)
        self.assertIn(b'Content-Type: text/x-ftp-control-conversation',
                      warc_file_content)
        self.assertIn(
            'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'),
            warc_file_content
        )
        self.assertIn(
            'Python/{0}'.format(
                wpull.util.python_version()).encode('utf-8'),
            warc_file_content
        )
        self.assertIn(b'KITTEH DOGE', warc_file_content)
        self.assertIn(b'* Opening control connection to', warc_file_content)
        self.assertIn(b'* Kept control connection to', warc_file_content)
        self.assertIn(b'* Opened data connection to ', warc_file_content)
        self.assertIn(b'* Closed data connection to ', warc_file_content)
        self.assertIn(b'> GIMMEH example.txt', warc_file_content)
        self.assertIn(b'< 200 OK, no need to yell.', warc_file_content)

        # Ignore Concurrent Record ID not seen yet
        self.validate_warc(warc_filename, ignore_minor_error=True)

        with open(warc_filename, 'r+b') as in_file:
            # Intentionally modify the contents
            in_file.seek(355)
            in_file.write(b'f')

        with self.assertRaises(Exception):
            # Sanity check that it actually raises error on bad digest
            self.validate_warc(warc_filename, ignore_minor_error=True)
Пример #19
0
 def _add_request_password(self, request: Request):
     if self._fetch_rule.ftp_login:
         request.username, request.password = self._fetch_rule.ftp_login
Пример #20
0
    def test_warc_recorder_ftp(self):
        file_prefix = 'asdf'
        warc_filename = 'asdf.warc'

        warc_recorder = WARCRecorder(file_prefix,
                                     params=WARCRecorderParams(compress=False))

        request = FTPRequest('ftp://example.com/example.txt')
        request.address = ('0.0.0.0', 80)
        response = FTPResponse()
        response.reply = FTPReply(200, 'OK')
        response.body = Body()
        response.data_address = ('0.0.0.0', 12345)

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'KITTEH DOGE')

        session = warc_recorder.new_ftp_recorder_session()
        session.begin_control(request)
        session.control_send_data(b'GIMMEH example.txt')
        session.control_receive_data(b'200 OK, no need to yell.')
        session.begin_transfer(response)
        session.transfer_receive_data(b'KITTEH DOGE')
        session.end_transfer(response)
        session.end_control(response)
        session.close()

        warc_recorder.close()

        with open(warc_filename, 'rb') as in_file:
            warc_file_content = in_file.read()

        self.assertTrue(warc_file_content.startswith(b'WARC/1.0'))
        self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content)
        self.assertIn(b'Content-Type: application/warc-fields',
                      warc_file_content)
        self.assertIn(b'WARC-Date: ', warc_file_content)
        self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content)
        self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content)
        self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content)
        self.assertIn(b'WARC-Type: resource\r\n', warc_file_content)
        self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content)
        self.assertIn(b'Content-Type: application/octet-stream',
                      warc_file_content)
        self.assertIn(b'WARC-Type: metadata', warc_file_content)
        self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content)
        self.assertIn(b'Content-Type: text/x-ftp-control-conversation',
                      warc_file_content)
        self.assertIn(
            'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'),
            warc_file_content)
        self.assertIn(
            'Python/{0}'.format(wpull.util.python_version()).encode('utf-8'),
            warc_file_content)
        self.assertIn(b'KITTEH DOGE', warc_file_content)
        self.assertIn(b'* Opening control connection to', warc_file_content)
        self.assertIn(b'* Kept control connection to', warc_file_content)
        self.assertIn(b'* Opened data connection to ', warc_file_content)
        self.assertIn(b'* Closed data connection to ', warc_file_content)
        self.assertIn(b'> GIMMEH example.txt', warc_file_content)
        self.assertIn(b'< 200 OK, no need to yell.', warc_file_content)

        # Ignore Concurrent Record ID not seen yet
        self.validate_warc(warc_filename, ignore_minor_error=True)

        with open(warc_filename, 'r+b') as in_file:
            # Intentionally modify the contents
            in_file.seek(355)
            in_file.write(b'f')

        with self.assertRaises(Exception):
            # Sanity check that it actually raises error on bad digest
            self.validate_warc(warc_filename, ignore_minor_error=True)