def test_exit_on_exception(self): """ Test the exit_on_exception function """ # test handling of 'Bad Request' server errors with patch('sys.stderr', new_callable=StringIO) as stderr_mock: expected_message = "source error message" orig_ex = RuntimeError("Bad Request") ex = exceptions.HttpException(expected_message, orig_ex) try: raise ex except Exception as ex: with self.assertRaises(MyExitError): exit_on_exception(ex) self.assertTrue(expected_message in stderr_mock.getvalue()) # test handling of certificate expiration message from server with patch('sys.stderr', new_callable=StringIO) as stderr_mock: expected_message = "Certificate expired" orig_ex = RuntimeError( "this message indicates certificate expired") ex = exceptions.HttpException(None, orig_ex) try: raise ex except Exception as ex: with self.assertRaises(MyExitError): exit_on_exception(ex) self.assertTrue(expected_message in stderr_mock.getvalue()) # test handling of other server error messages with patch('sys.stderr', new_callable=StringIO) as stderr_mock: expected_message = "other error message" orig_ex = RuntimeError(expected_message) ex = exceptions.HttpException(None, orig_ex) try: raise ex except Exception as ex: with self.assertRaises(MyExitError): exit_on_exception(ex) self.assertTrue(expected_message in stderr_mock.getvalue()) # test handling of other non-server error messages with patch('sys.stderr', new_callable=StringIO) as stderr_mock: expected_message = "non-server error message" ex = RuntimeError(expected_message) try: raise ex except Exception as ex: with self.assertRaises(MyExitError): exit_on_exception(ex) self.assertTrue(expected_message in stderr_mock.getvalue())
def send(self, request, **kwargs): """ Send a given PreparedRequest, wrapping the connection to service in try/except that retries on Connection reset by peer. :param request: The prepared request to send._session :param kwargs: Any keywords the adaptor for the request accepts. :return: the response :rtype: requests.Response """ # merge kwargs with env proxies = kwargs.get('proxies') or {} settings = self.merge_environment_settings( request.url, proxies, kwargs.get('stream'), kwargs.get('verify'), kwargs.get('cert')) kwargs.update(settings) # requests does not provide a default timeout, hence we might need # to add it if 'timeout' not in kwargs or kwargs['timeout'] is None: kwargs['timeout'] = 120 if self.retry: current_delay = max(self.start_delay, DEFAULT_RETRY_DELAY) current_delay = min(current_delay, MAX_RETRY_DELAY) num_retries = 0 self.logger.debug( "Sending request {0} to server.".format(request)) current_error = None while num_retries < MAX_NUM_RETRIES: try: response = super(RetrySession, self).send(request, **kwargs) self.check_status(response) return response except requests.HTTPError as e: if e.response.status_code not in self.retry_errors: raise exceptions.HttpException(e) current_error = e if e.response.status_code == requests.codes.unavailable: # is there a delay from the server (Retry-After)? try: current_delay = int( e.response.headers.get(SERVICE_RETRY, current_delay)) current_delay = min(current_delay, MAX_RETRY_DELAY) except Exception: pass except requests.ConnectionError as ce: current_error = ce # TODO not sure this appropriate for all the # 'Connection reset by peer' errors. # A post/put to vospace returns a document. If operation # succeeded but the error occurs during the response the # code below will send the request again. Since the # resource has been created/updated, a new error (bad # request maybe) might be issued by the server and that # can confuse the caller. # This code should probably deal with HTTP errors only # as the 503s above. self.logger.debug("Caught exception: {0}".format(ce)) if ce.errno != 104: # Only continue trying on a reset by peer error. raise exceptions.HttpException(orig_exception=ce) self.logger.warning( "Resending request in {}s ...".format(current_delay)) time.sleep(current_delay) num_retries += 1 current_delay = min(current_delay * 2, MAX_RETRY_DELAY) raise exceptions.HttpException(current_error) else: response = super(RetrySession, self).send(request, **kwargs) self.check_status(response) return response
def put_file(self, archive, src_file, archive_stream=None, mime_type=None, mime_encoding=None, md5_check=True, input_name=None): """ Puts a file into the archive storage :param archive: name of the archive :param src_file: location of the source file :param archive_stream: specific archive stream :param mime_type: file mime type :param mime_encoding: file mime encoding :param md5_check: if True, calculate the md5sum before sending the file Server will fail if it receives a corrupted file. :param input_name: name to use in the archive overriding the actual file name. """ if not archive: raise AttributeError('No archive specified') # We actually raise an exception here since the web # service will normally respond with a 200 for an # anonymous put, though not provide any endpoints. if self._data_client.subject.anon: raise exceptions.UnauthorizedException( 'Must be authenticated to put data') self.logger.debug('PUT {}/{}'.format(archive, src_file)) headers = {} if md5_check: # calculate the md5sum md5sum = self._get_md5sum(src_file) headers['Content-MD5'] = md5sum logger.debug('Set Content-MD5: {}'.format(md5sum)) if archive_stream is not None: headers[ARCHIVE_STREAM_HTTP_HEADER] = str(archive_stream) if mime_type is not None: mtype = mime_type elif MAGIC_WARN: mtype = None logger.warning(MAGIC_WARN) else: m = magic.Magic(mime=True) mtype = m.from_file(os.path.realpath(src_file)) if mtype is not None: headers['Content-Type'] = mtype logger.debug('Set MIME type: {}'.format(mtype)) if mime_encoding: mencoding = mime_encoding elif MAGIC_WARN: mencoding = None if mtype: logger.warning(MAGIC_WARN) else: m = magic.Magic(mime_encoding=True) mencoding = m.from_file(os.path.realpath(src_file)) if mencoding: headers['Content-Encoding'] = mencoding logger.debug('Set MIME encoding: {}'.format(mencoding)) fname = input_name if not fname: fname = os.path.basename(src_file) protocols = self._get_transfer_protocols(archive, fname, is_get=False, headers=headers) if len(protocols) == 0: raise exceptions.HttpException('No URLs available to put data to') # get the list of transfer points for protocol in protocols: url = protocol.endpoint if url is None: self.logger.debug('No endpoint for URI, skipping.') continue self.logger.debug('PUT to URL {}'.format(url)) try: start = time.time() with open(src_file, 'rb') as f: self._data_client.put(url, headers=headers, data=f) duration = time.time() - start stat_info = os.stat(src_file) self.logger.info( ('Successfully uploaded archive/file {}/{} in {}s ' '(avg. speed: {}MB/s)').format( archive, src_file, round(duration, 2), round(stat_info.st_size / 1024 / 1024 / duration, 2))) return except (exceptions.HttpException, socket.timeout) as e: # try a different URL self.logger.info( 'WARN: Cannot put data to {}. Exception: {}'.format( url, e)) self.logger.warn('Try the next URL') continue raise exceptions.HttpException( 'Unable to put data from any of the available URLs')
def get_file(self, archive, file_name, destination=None, decompress=False, cutout=None, fhead=False, wcs=False, process_bytes=None, md5_check=True): """ Get a file from an archive. The entire file is delivered unless the cutout argument is present specifying a cutout to extract from file. :param archive: name of the archive containing the file :param file_name: the name of the file to retrieve :param destination: file to save data to (file, file_name, stream or anything that supports open/close and write). If None, the file is saved locally with the name provided by the content disposion received from the service. :param decompress: True to decompress the file (if applicable), False otherwise :param cutout: the arguments of cutout operation to be performed by the service :param fhead: download just the head of a fits file :param wcs: True if the wcs is to be included with the file :param process_bytes: function to be applied to the received bytes :param md5_check: if True, do md5sum check for corrupted data :return: the data stream object """ assert archive is not None assert file_name is not None params = {} if fhead: params['fhead'] = fhead if wcs: params['wcs'] = wcs if cutout: params['cutout'] = cutout file_info = '{}/{}'.format(archive, file_name) self.logger.debug('GET {}'.format(file_info)) # TODO negotiate transfer even for fhead or wcs? protocols = self._get_transfer_protocols(archive, file_name, params=params) if len(protocols) == 0: raise exceptions.HttpException('No URLs available to access data') # get the list of transfer points for protocol in protocols: url = protocol.endpoint if url is None: self.logger.debug('No endpoint for URI, skipping.') continue self.logger.debug('GET from URL {}'.format(url)) try: response = self._data_client.get(url, stream=True) if destination is not None: if not hasattr(destination, 'read'): # got a destination name? with open(destination, 'wb') as f: self._save_bytes(response, f, file_info, decompress=decompress, process_bytes=process_bytes, md5_check=md5_check) else: self._save_bytes(response, destination, file_info, decompress=decompress, process_bytes=process_bytes, md5_check=md5_check) else: # get the destination name from the content disposition content_disp = response.headers.get( 'content-disposition', '') destination = file_name for content in content_disp.split(): if 'filename=' in content: destination = content[9:] self.logger.debug( 'Content disposition destination name: {}'. format(destination)) if destination.endswith('gz') and decompress: destination = os.path.splitext(destination)[0] # remove any path information and save the file in local # directory destination = os.path.basename(destination) self.logger.info( 'Saved file in local directory under: {}'.format( destination)) with open(destination, 'wb') as f: self._save_bytes(response, f, file_info, decompress=decompress, process_bytes=process_bytes, md5_check=md5_check) return except (exceptions.HttpException, socket.timeout) as e: # try a different URL self.logger.info( 'WARN: Cannot retrieve data from {}. Exception: {}'.format( url, e)) self.logger.warn('Try the next URL') continue except DownloadError as e: if not hasattr(destination, 'read'): # try to cleanup the corrupted file try: os.unlink(destination) except Exception: # nothing we can do pass raise exceptions.HttpException(str(e)) raise exceptions.HttpException( 'Unable to download data from any of the available URLs')
def test_get_reg(self, get_mock, file_mock, file_modtime_mock): """ Tests the registry part of WsCapabilities """ # test when registry information is read from the server # (cache is outdated) service = 'myservice' resource_id = 'ivo://canfar.phys.uvic.ca/{}'.format(service) resource_cap_url = 'http://www.canfar.net/myservice' cadcreg_content = ('#test content\n {} = {} \n' 'ivo://some.provider/service = ' 'http://providerurl.test/service'). \ format(resource_id, resource_cap_url) response = Mock(text=cadcreg_content) get_mock.return_value = response # set the modified time of the cache file to 0 to make sure the info # is retrieved from server file_modtime_mock.return_value = 0 # test anonymous access fh_mock = Mock() file_mock.write = fh_mock client = Mock(resource_id=resource_id) caps = ws.WsCapabilities(client) self.assertEqual(os.path.join(ws.CACHE_LOCATION, ws.REGISTRY_FILE), caps.reg_file) self.assertEqual( os.path.join(ws.CACHE_LOCATION, 'canfar.phys.uvic.ca', '.{}'.format(service)), caps.caps_file) self.assertEqual(resource_cap_url, caps._get_capability_url()) file_mock.assert_called_once_with( os.path.join(ws.CACHE_LOCATION, ws.REGISTRY_FILE), 'w') # TODO not sure why need to access write this way file_mock().__enter__.return_value.write.assert_called_once_with( cadcreg_content) # test when registry information is retrieved from the cache file get_mock.reset_mock() get_mock.return_value = None file_modtime_mock.reset_mock() file_mock.reset_mock() resource_cap_url2 = 'http://www.canfar.net/myservice2' cache_content2 = ('#test content\n {} = {} \n' 'ivo://some.provider/service = ' 'http://providerurl.test/service'). \ format(resource_id, resource_cap_url2) file_modtime_mock.return_value = time.time() file_mock().__enter__.return_value.read.return_value = cache_content2 caps = ws.WsCapabilities(client) self.assertEqual(resource_cap_url2, caps._get_capability_url()) # test when registry information is outdated but there are # errors retrieving it from the CADC registry # so in the end go back and use the cache version file_modtime_mock.reset_mock() file_mock.reset_mock() file_modtime_mock.return_value = 0 file_mock().__enter__.return_value.read.return_value = cache_content2 get_mock.side_effect = [exceptions.HttpException()] client.get.side_effect = [exceptions.HttpException] caps = ws.WsCapabilities(client) self.assertEqual(resource_cap_url2, caps._get_capability_url())