def test_login_no_password_required(self): client = Client() file = io.BytesIO() with client.session() as session: request = Request(self.get_url('/example (copy).txt')) request.username = '******' yield from session.start(request) yield from session.download(file)
def test_fetch_file_restart_not_supported(self): client = Client() file = io.BytesIO() with client.session() as session: request = Request(self.get_url('/example (copy).txt')) request.set_continue(99999) # Magic value in the test server response = yield from session.start(request) self.assertFalse(response.restart_value) yield from session.download(file) self.assertEqual( 'The real treasure is in Smaug’s heart 💗.\n'.encode('utf-8'), response.body.content())
def test_fetch_file_restart(self): client = Client() file = io.BytesIO() with client.session() as session: request = Request(self.get_url('/example (copy).txt')) request.set_continue(10) response = yield from session.start(request) self.assertEqual(10, response.restart_value) yield from session.download(file) self.assertEqual( 'reasure is in Smaug’s heart 💗.\n'.encode('utf-8'), response.body.content())
def test_fetch_file_restart(self): client = Client() file = io.BytesIO() with client.session() as session: request = Request(self.get_url('/example (copy).txt')) request.set_continue(10) response = yield from session.start(request) self.assertEqual(10, response.restart_value) yield from session.download(file) self.assertEqual( 'reasure is in Smaug’s heart 💗.\n'.encode('utf-8'), response.body.content() )
def test_fetch_file_restart_not_supported(self): client = Client() file = io.BytesIO() with client.session() as session: request = Request(self.get_url('/example (copy).txt')) request.set_continue(99999) # Magic value in the test server response = yield from session.start(request) self.assertFalse(response.restart_value) yield from session.download(file) self.assertEqual( 'The real treasure is in Smaug’s heart 💗.\n'.encode('utf-8'), response.body.content() )
def process(self): '''Process. Coroutine. ''' self._item_session.request = request = Request(self._item_session.url_record.url) verdict = self._fetch_rule.check_ftp_request(self._item_session)[0] if not verdict: self._item_session.skip() return self._add_request_password(request) dir_name, filename = self._item_session.url_record.url_info.split_path() if self._processor.fetch_params.glob and frozenset(filename) & GLOB_CHARS: request = self._to_directory_request(request) is_file = False self._glob_pattern = urllib.parse.unquote(filename) else: is_file = yield from self._prepare_request_file_vs_dir(request) self._file_writer_session.process_request(request) wait_time = yield from self._fetch(request, is_file) if wait_time: _logger.debug('Sleeping {0}.', wait_time) yield from asyncio.sleep(wait_time)
def _prepare_fetch(self, request: Request, response: Response): '''Prepare for a fetch. Coroutine. ''' self._request = request self._response = response yield from self._init_stream() connection_closed = self._control_connection.closed() if connection_closed: self._login_table.pop(self._control_connection, None) yield from self._control_stream.reconnect() request.address = self._control_connection.address connection_reused = not connection_closed self.event_dispatcher.notify(self.Event.begin_control, request, connection_reused=connection_reused) if connection_closed: yield from self._commander.read_welcome_message() yield from self._log_in() self._response.request = request
def _prepare_fetch(self, request: Request, response: Response): """Prepare for a fetch. Coroutine. """ self._request = request self._response = response yield from self._init_stream() connection_closed = self._control_connection.closed() if connection_closed: self._login_table.pop(self._control_connection, None) yield from self._control_stream.reconnect() request.address = self._control_connection.address connection_reused = not connection_closed self.event_dispatcher.notify(self.Event.begin_control, request, connection_reused=connection_reused) if connection_closed: yield from self._commander.read_welcome_message() yield from self._log_in() self._response.request = request
def test_duration_timeout(self): client = Client() file = io.BytesIO() with self.assertRaises(DurationTimeout), client.session() as session: yield from \ session.start(Request(self.get_url('/hidden/sleep.txt'))) yield from session.download(file, duration_timeout=0.1)
def test_to_dict(self): request = Request('ftp://foofle.com') request_dict = request.to_dict() self.assertEqual('ftp://foofle.com', request_dict['url']) self.assertEqual('ftp', request_dict['protocol']) response = Response() response.request = request response.reply = Reply(code=200, text='Success') response_dict = response.to_dict() self.assertEqual('ftp://foofle.com', response_dict['request']['url']) self.assertEqual('ftp', response_dict['protocol']) self.assertEqual(200, response_dict['reply']['code']) self.assertEqual(200, response_dict['response_code']) self.assertEqual('Success', response_dict['reply']['text']) self.assertEqual('Success', response_dict['response_message'])
def test_fetch_file(self): client = Client() file = io.BytesIO() with client.session() as session: response = yield from \ session.start(Request(self.get_url('/example (copy).txt'))) yield from session.download(file) self.assertEqual( 'The real treasure is in Smaug’s heart 💗.\n'.encode('utf-8'), response.body.content())
def test_fetch_no_file(self): client = Client() file = io.BytesIO() with client.session() as session: try: yield from \ session.start(Request(self.get_url('/asdf.txt'))) yield from session.download(file) except FTPServerError as error: self.assertEqual(550, error.reply_code) else: self.fail() # pragma: no cover
def test_fetch_listing(self): client = Client() file = io.BytesIO() with client.session() as session: response = yield from \ session.start_listing(Request(self.get_url('/'))) yield from session.download_listing(file) print(response.body.content()) self.assertEqual(5, len(response.files)) self.assertEqual('junk', response.files[0].name) self.assertEqual('example1', response.files[1].name) self.assertEqual('example2💎', response.files[2].name) self.assertEqual('example (copy).txt', response.files[3].name) self.assertEqual('readme.txt', response.files[4].name)
def test_fetch_bad_pasv_addr(self): client = Client() file = io.BytesIO() with client.session() as session: original_func = session._log_in @asyncio.coroutine def override_func(): yield from original_func() yield from session._control_stream.write_command( Command('EVIL_BAD_PASV_ADDR')) print('Evil awaits') # TODO: should probably have a way of sending custom commands session._log_in = override_func with self.assertRaises(ProtocolError): yield from \ session.start(Request(self.get_url('/example (copy).txt')))
def _prepare_request_file_vs_dir(self, request: Request) -> bool: '''Check if file, modify request, and return whether is a file. Coroutine. ''' if self._item_session.url_record.link_type: is_file = self._item_session.url_record.link_type == LinkType.file elif request.url_info.path.endswith('/'): is_file = False else: is_file = 'unknown' if is_file == 'unknown': files = yield from self._fetch_parent_path(request) if not files: return True filename = posixpath.basename(request.file_path) for file_entry in files: if file_entry.name == filename: _logger.debug('Found entry in parent. Type {}', file_entry.type) is_file = file_entry.type != 'dir' break else: _logger.debug('Did not find entry. Assume file.') return True if not is_file: request.url = append_slash_to_path_url(request.url_info) _logger.debug('Request URL changed to {}. Path={}.', request.url, request.file_path) return is_file
def test_warc_recorder_ftp(self): file_prefix = 'asdf' warc_filename = 'asdf.warc' warc_recorder = WARCRecorder( file_prefix, params=WARCRecorderParams(compress=False) ) request = FTPRequest('ftp://example.com/example.txt') request.address = ('0.0.0.0', 80) response = FTPResponse() response.reply = FTPReply(200, 'OK') response.body = Body() response.data_address = ('0.0.0.0', 12345) with wpull.util.reset_file_offset(response.body): response.body.write(b'KITTEH DOGE') session = warc_recorder.new_ftp_recorder_session() session.begin_control(request) session.control_send_data(b'GIMMEH example.txt') session.control_receive_data(b'200 OK, no need to yell.') session.begin_transfer(response) session.transfer_receive_data(b'KITTEH DOGE') session.end_transfer(response) session.end_control(response) session.close() warc_recorder.close() with open(warc_filename, 'rb') as in_file: warc_file_content = in_file.read() self.assertTrue(warc_file_content.startswith(b'WARC/1.0')) self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content) self.assertIn(b'Content-Type: application/warc-fields', warc_file_content) self.assertIn(b'WARC-Date: ', warc_file_content) self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content) self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content) self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content) self.assertIn(b'WARC-Type: resource\r\n', warc_file_content) self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content) self.assertIn(b'Content-Type: application/octet-stream', warc_file_content) self.assertIn(b'WARC-Type: metadata', warc_file_content) self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content) self.assertIn(b'Content-Type: text/x-ftp-control-conversation', warc_file_content) self.assertIn( 'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'), warc_file_content ) self.assertIn( 'Python/{0}'.format( wpull.util.python_version()).encode('utf-8'), warc_file_content ) self.assertIn(b'KITTEH DOGE', warc_file_content) self.assertIn(b'* Opening control connection to', warc_file_content) self.assertIn(b'* Kept control connection to', warc_file_content) self.assertIn(b'* Opened data connection to ', warc_file_content) self.assertIn(b'* Closed data connection to ', warc_file_content) self.assertIn(b'> GIMMEH example.txt', warc_file_content) self.assertIn(b'< 200 OK, no need to yell.', warc_file_content) # Ignore Concurrent Record ID not seen yet self.validate_warc(warc_filename, ignore_minor_error=True) with open(warc_filename, 'r+b') as in_file: # Intentionally modify the contents in_file.seek(355) in_file.write(b'f') with self.assertRaises(Exception): # Sanity check that it actually raises error on bad digest self.validate_warc(warc_filename, ignore_minor_error=True)
def _add_request_password(self, request: Request): if self._fetch_rule.ftp_login: request.username, request.password = self._fetch_rule.ftp_login
def test_warc_recorder_ftp(self): file_prefix = 'asdf' warc_filename = 'asdf.warc' warc_recorder = WARCRecorder(file_prefix, params=WARCRecorderParams(compress=False)) request = FTPRequest('ftp://example.com/example.txt') request.address = ('0.0.0.0', 80) response = FTPResponse() response.reply = FTPReply(200, 'OK') response.body = Body() response.data_address = ('0.0.0.0', 12345) with wpull.util.reset_file_offset(response.body): response.body.write(b'KITTEH DOGE') session = warc_recorder.new_ftp_recorder_session() session.begin_control(request) session.control_send_data(b'GIMMEH example.txt') session.control_receive_data(b'200 OK, no need to yell.') session.begin_transfer(response) session.transfer_receive_data(b'KITTEH DOGE') session.end_transfer(response) session.end_control(response) session.close() warc_recorder.close() with open(warc_filename, 'rb') as in_file: warc_file_content = in_file.read() self.assertTrue(warc_file_content.startswith(b'WARC/1.0')) self.assertIn(b'WARC-Type: warcinfo\r\n', warc_file_content) self.assertIn(b'Content-Type: application/warc-fields', warc_file_content) self.assertIn(b'WARC-Date: ', warc_file_content) self.assertIn(b'WARC-Record-ID: <urn:uuid:', warc_file_content) self.assertIn(b'WARC-Block-Digest: sha1:', warc_file_content) self.assertNotIn(b'WARC-Payload-Digest: sha1:', warc_file_content) self.assertIn(b'WARC-Type: resource\r\n', warc_file_content) self.assertIn(b'WARC-Target-URI: ftp://', warc_file_content) self.assertIn(b'Content-Type: application/octet-stream', warc_file_content) self.assertIn(b'WARC-Type: metadata', warc_file_content) self.assertIn(b'WARC-Concurrent-To: <urn:uuid:', warc_file_content) self.assertIn(b'Content-Type: text/x-ftp-control-conversation', warc_file_content) self.assertIn( 'Wpull/{0}'.format(wpull.version.__version__).encode('utf-8'), warc_file_content) self.assertIn( 'Python/{0}'.format(wpull.util.python_version()).encode('utf-8'), warc_file_content) self.assertIn(b'KITTEH DOGE', warc_file_content) self.assertIn(b'* Opening control connection to', warc_file_content) self.assertIn(b'* Kept control connection to', warc_file_content) self.assertIn(b'* Opened data connection to ', warc_file_content) self.assertIn(b'* Closed data connection to ', warc_file_content) self.assertIn(b'> GIMMEH example.txt', warc_file_content) self.assertIn(b'< 200 OK, no need to yell.', warc_file_content) # Ignore Concurrent Record ID not seen yet self.validate_warc(warc_filename, ignore_minor_error=True) with open(warc_filename, 'r+b') as in_file: # Intentionally modify the contents in_file.seek(355) in_file.write(b'f') with self.assertRaises(Exception): # Sanity check that it actually raises error on bad digest self.validate_warc(warc_filename, ignore_minor_error=True)