def test_visit_retry_on_412(self): # observation changed on server while visited core.BATCH_SIZE = 3 # size of the batch is 3 obs = [['a'], []] level = logging.DEBUG visitor = CAOM2RepoClient(auth.Subject(), level) observation = SimpleObservation('cfht', 'a') observation.acc_meta_checksum = ChecksumURI('md5:abc') visitor.get_observation = MagicMock( side_effect=[observation, observation]) exception_412 = exceptions.UnexpectedException() exception_412.orig_exception = Mock() exception_412.orig_exception.response = Mock(status_code=412) visitor.post_observation = MagicMock(side_effect=[exception_412, None]) visitor._get_observations = MagicMock(side_effect=obs) (visited, updated, skipped, failed) = visitor.visit(os.path.join(THIS_DIR, 'passplugin.py'), 'cfht') self.assertEqual(1, len(visited)) self.assertEqual(1, len(updated)) self.assertEqual(0, len(skipped)) self.assertEqual(0, len(failed)) # get and post called twice to recover from error HTTP status 412 - # precondition self.assertEqual(2, visitor.get_observation.call_count) self.assertEqual(2, visitor.post_observation.call_count) visitor.post_observation.assert_called_with( observation, observation.acc_meta_checksum.uri)
def check_status(self, response): """ Check the response status. Maps the application related requests error status into Exceptions and raises the others :param response: response :return: """ try: response.raise_for_status() except requests.HTTPError as e: if e.response.status_code == requests.codes.not_found: raise exceptions.NotFoundException(orig_exception=e) elif e.response.status_code == requests.codes.unauthorized: raise exceptions.UnauthorizedException(orig_exception=e) elif e.response.status_code == requests.codes.forbidden: raise exceptions.ForbiddenException(orig_exception=e) elif e.response.status_code == requests.codes.bad_request: raise exceptions.BadRequestException(orig_exception=e) elif e.response.status_code == requests.codes.conflict: raise exceptions.AlreadyExistsException(orig_exception=e) elif e.response.status_code == \ requests.codes.internal_server_error: raise exceptions.InternalServerException(orig_exception=e) elif e.response.status_code == \ requests.codes.request_entity_too_large: raise exceptions.ByteLimitException(orig_exception=e) elif self.retry and e.response.status_code in self.retry_errors: raise e else: raise exceptions.UnexpectedException(orig_exception=e)
def get_head(self, uri): """ Retrieve FITS file header data. :param uri: str that is an Artifact URI, representing the file for which to retrieve headers :return: list of fits.Header instances """ self._logger.debug(f'Begin get_head for {uri}') start = StorageClientWrapper._current() try: b = BytesIO() b.name = uri if self._use_si: self._cadc_client.cadcget(uri, b, fhead=True) else: archive, f_name = StorageClientWrapper._decompose(uri) self._cadc_client.get_file(archive, f_name, b, fhead=True) fits_header = b.getvalue().decode('ascii') b.close() self._add_metric('get_head', uri, start, len(fits_header)) temp = make_headers_from_string(fits_header) self._logger.debug('End get_head') return temp except Exception as e: self._add_fail_metric('get_header', uri) self._logger.debug(traceback.format_exc()) self._logger.error(e) raise exceptions.UnexpectedException( f'Did not retrieve {uri} header because {e}' )
def get(self, working_directory, uri): """ Retrieve data. :param working_directory: str where the file will be retrieved to. Assumes the same machine as this function is being called from. :param uri: str this is an Artifact URI, representing the file to be retrieved. """ self._logger.debug(f'Being get for {uri} in {working_directory}') start = StorageClientWrapper._current() try: archive, f_name = self._decompose(uri) fqn = path.join(working_directory, f_name) if self._use_si: self._cadc_client.cadcget(uri, dest=fqn) else: self._cadc_client.get_file(archive, f_name, destination=fqn) except Exception as e: self._add_fail_metric('get', uri) self._logger.debug(traceback.format_exc()) raise exceptions.UnexpectedException( f'Did not retrieve {uri} because {e}' ) self._add_metric('get', uri, start, stat(fqn).st_size) self._logger.debug('End get')
def test_client_put_failure(mock_metrics): if not os.path.exists(f'{tc.TEST_FILES_DIR}/TEST.fits'): with open(f'{tc.TEST_FILES_DIR}/TEST.fits', 'w') as f: f.write('test content') mock_client = Mock() mock_client.cadcput.side_effect = ( exceptions.UnexpectedException('error state')) test_destination = 'cadc:GEMINI/TEST.fits' with pytest.raises(mc.CadcException): clc.si_client_put( mock_client, os.path.join(tc.TEST_FILES_DIR, 'TEST.fits'), test_destination, metrics=mock_metrics, ) test_fqn = os.path.join(tc.TEST_FILES_DIR, 'TEST.fits') mock_client.cadcput.assert_called_with( 'cadc:GEMINI/TEST.fits', src='/test_files/TEST.fits', replace=True, file_type='application/fits', file_encoding='', md5_checksum='9473fdd0d880a43c21b7778d34872157', ), 'mock not called' assert mock_metrics.observe_failure.called, 'mock not called'
def test_preview_augment(http_mock): # this should result in two new artifacts being added to the plane # one for a thumbnail and one for a preview obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_metrics = mc.Metrics(test_config) test_observable = mc.Observable(test_rejected, test_metrics) cadc_client_mock = Mock() clients_mock = Mock() clients_mock.data_client = cadc_client_mock test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits') kwargs = { 'working_directory': '/test_files', 'clients': clients_mock, 'observable': test_observable, 'storage_name': test_storage_name, } test_prev = f'/test_files/{TEST_PRODUCT_ID}.jpg' if os.path.exists(test_prev): os.unlink(test_prev) try: cadc_client_mock.get.side_effect = exceptions.UnexpectedException( 'test') http_mock.side_effect = _get_mock obs = preview_augmentation.visit(obs, **kwargs) test_url = (f'{preview_augmentation.PREVIEW_URL}' f'{TEST_PRODUCT_ID}.fits') assert http_mock.called, 'http mock should be called' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert cadc_client_mock.put.called, 'put mock not called' cadc_client_mock.put.assert_called_with( '/test_files', 'cadc:GEMINI/GN2001BQ013-04_th.jpg', ), 'wrong put arguments' assert obs is not None, 'expect a result' assert (len( obs.planes[TEST_PRODUCT_ID].artifacts) == 3), 'two new artifacts' prev_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}.jpg', SCHEME) thumb_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}_th.jpg', 'cadc') assert (prev_uri in obs.planes[TEST_PRODUCT_ID].artifacts.keys()), 'no preview' assert (thumb_uri in obs.planes[TEST_PRODUCT_ID].artifacts), 'no thumbnail' finally: if os.path.exists(test_prev): os.unlink(test_prev)
def put(self, working_directory, uri, stream='default'): """ Store a file at CADC. :param working_directory: str fully-qualified name of where to find the file on the local machine :param uri: str that is an Artifact URI, representing the file to be stored at CADC. :param stream: str representing the namespace used by the CadcDataClient. Not required if using the StorageInventoryClient. 'default' is default name for a lately-created ad archive. """ self._logger.debug(f'Begin put for {uri} in {working_directory}') start = self._current() cwd = getcwd() archive, f_name = StorageClientWrapper._decompose(uri) fqn = path.join(working_directory, f_name) chdir(working_directory) try: local_meta = get_local_file_info(fqn) if self._use_si: replace = True cadc_meta = self.info(uri) if cadc_meta is None: replace = False self._cadc_client.cadcput( uri, src=fqn, replace=replace, file_type=local_meta.file_type, file_encoding='', md5_checksum=local_meta.md5sum, ) else: archive, f_name = self._decompose(uri) self._cadc_client.put_file( archive, f_name, archive_stream=stream, mime_type=local_meta.file_type, mime_encoding='', md5_check=True, ) self._logger.info(f'Stored {fqn} at CADC.') except Exception as e: self._add_fail_metric('put', uri) self._logger.debug(traceback.format_exc()) self._logger.error(e) raise exceptions.UnexpectedException( f'Failed to store data with {e}') finally: chdir(cwd) self._add_metric('put', uri, start, local_meta.size) self._logger.debug('End put')
def test_preview_augment_unknown_no_preview(): # what happens when it's not known that there's no preview obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' # make sure the rejected file is empty if os.path.exists(REJECTED_FILE): os.unlink(REJECTED_FILE) test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits') cadc_client_mock = Mock() clients_mock = Mock() clients_mock.data_client = cadc_client_mock kwargs = { 'working_directory': TEST_DATA_DIR, 'clients': clients_mock, 'stream': 'stream', 'observable': test_observable, 'storage_name': test_storage_name, } with patch( 'caom2pipe.manage_composable.http_get', side_effect=mc.CadcException( 'Not Found for url: https://archive.gemini.edu/preview'), ) as http_mock, patch( 'caom2pipe.manage_composable.data_put') as ad_put_mock, patch( 'caom2pipe.manage_composable.get_artifact_metadata' ) as art_mock, patch( 'caom2pipe.manage_composable.exec_cmd') as exec_mock: cadc_client_mock.get.side_effect = exceptions.UnexpectedException( 'test') obs = preview_augmentation.visit(obs, **kwargs) assert obs is not None, 'expect result' test_url = f'{preview_augmentation.PREVIEW_URL}{TEST_PRODUCT_ID}.fits' test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.jpg' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert not ad_put_mock.called, 'ad put mock should not be called' assert not art_mock.called, 'art mock should not be called' assert not exec_mock.called, 'exec mock should not be called'
def remove(self, uri): """ Delete a file from CADC storage. :param uri: str that is an Artifact URI, representing the file to be removed from CADC. """ self._logger.debug(f'Begin remove for {uri}') start = StorageClientWrapper._current() if self._use_si: try: self._cadc_client.cadcremove(uri) except Exception as e: self._add_fail_metric('remove', uri) self._logger.debug(traceback.format_exc()) self._logger.error(e) raise exceptions.UnexpectedException( f'Did not remove {uri} because {e}') else: raise NotImplementedError( 'No remove functionality for CadcDataClient') self._add_metric('remove', uri, start, value=None) self._logger.debug('End remove')
def test_preview_augment_failure(http_mock): # mimic 'Not Found' behaviour # this should result in no new artifacts being added to the plane # but a record for 'no preview exists at Gemini' added to the # record def _failure_mock(ignore_url, ignore_local_fqn): raise mc.CadcException( 'Could not retrieve /usr/src/app/N20211007A0003/' 'N20211007A0003b.jpg from ' 'https://archive.gemini.edu/preview/N20211007A0003b.fits. Failed ' 'with 404 Client Error: Not Found for url: ' 'https://archive.gemini.edu/preview/N20211007A0003b.fits') obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_metrics = mc.Metrics(test_config) test_observable = mc.Observable(test_rejected, test_metrics) cadc_client_mock = Mock() clients_mock = Mock() clients_mock.data_client = cadc_client_mock test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits') kwargs = { 'working_directory': '/test_files', 'clients': clients_mock, 'observable': test_observable, 'storage_name': test_storage_name, } test_prev = f'/test_files/{TEST_PRODUCT_ID}.jpg' if os.path.exists(test_prev): os.unlink(test_prev) try: cadc_client_mock.get.side_effect = exceptions.UnexpectedException( 'test') http_mock.side_effect = _failure_mock obs = preview_augmentation.visit(obs, **kwargs) test_url = (f'{preview_augmentation.PREVIEW_URL}' f'{TEST_PRODUCT_ID}.fits') assert http_mock.called, 'http mock should be called' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert not cadc_client_mock.put.called, 'put mock should not be called' assert obs is not None, 'expect a result' assert (len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1 ), 'same as the pre-condition' prev_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}.jpg', SCHEME) thumb_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}_th.jpg', 'cadc') assert (prev_uri not in obs.planes[TEST_PRODUCT_ID].artifacts.keys() ), 'should be no preview' assert (thumb_uri not in obs.planes[TEST_PRODUCT_ID].artifacts ), 'should be no thumbnail' assert not ( test_rejected.is_no_preview(prev_uri)), 'preview should be tracked' assert http_mock.call_count == 1, 'wrong number of calls' # now try again to generate the preview, and ensure that the # rejected tracking is working obs = preview_augmentation.visit(obs, **kwargs) assert obs is not None, 'expect a result the second time' assert http_mock.call_count == 1, 'never even tried to retrieve it' finally: if os.path.exists(test_prev): os.unlink(test_prev)
def test_storage_inventory_client(cadc_client_mock): test_subject = Mock(autospec=True) test_uri = 'cadc:TEST/test_file.fits' test_working_directory = Path(test_fits2caom2.TESTDATA_DIR) test_fqn = test_working_directory / 'test_file.fits' if test_fqn.exists(): test_fqn.unlink() def info_si_mock(ignore): return FileInfo(id=test_uri, file_type='application/fits', md5sum='abc', size=42) def get_si_mock(ignore2, dest, **kwargs): fhead = kwargs.get('fhead') if fhead: dest.write(TEST_HEADERS) else: test_fqn.write_text('StorageInventoryClient') cadc_client_mock.return_value.cadcinfo.side_effect = info_si_mock cadc_client_mock.return_value.cadcget.side_effect = get_si_mock cadc_client_mock.return_value.cadcput = Mock(autospec=True) cadc_client_mock.return_value.cadcremove = Mock(autospec=True) test_wrapper = data_util.StorageClientWrapper( subject=test_subject, using_storage_inventory=True, ) assert test_wrapper is not None, 'ctor failure' # info test_result = test_wrapper.info(test_uri) _check_info_result(test_result) # get_head test_result = test_wrapper.get_head(test_uri) _check_header_result(test_result) # get test_wrapper.get(test_working_directory, test_uri) _check_get_result(test_fqn) # put test_wrapper.put(test_working_directory, test_uri) _check_put_result(cadc_client_mock.return_value.cadcput) # delete test_wrapper.remove(test_uri) assert cadc_client_mock.return_value.cadcremove.called, 'remove call' cadc_client_mock.return_value.cadcremove.assert_called_with( test_uri ), 'wrong remove args' cadc_client_mock.return_value.cadcinfo.side_effect = ( exceptions.UnexpectedException('cadcinfo') ) cadc_client_mock.return_value.cadcget.side_effect = ( exceptions.UnexpectedException('cadcget') ) cadc_client_mock.return_value.cadcput.side_effect = ( exceptions.UnexpectedException('cadcput') ) _fail_mock(test_wrapper, test_uri, test_working_directory) cadc_client_mock.return_value.cadcremove.side_effect = ( exceptions.UnexpectedException('cadcremove') ) with pytest.raises(exceptions.UnexpectedException): test_wrapper.remove(test_uri) cadc_client_mock.return_value.cadcinfo.side_effect = ( exceptions.NotFoundException('cadcinfo') ) test_result = test_wrapper.info(test_uri) assert test_result is None, 'expected when not found'
def test_cadc_data_client(cadc_client_mock): test_subject = Mock(autospec=True) test_uri = 'ad:TEST/test_file.fits' test_working_directory = Path(test_fits2caom2.TESTDATA_DIR) test_fqn = test_working_directory / 'test_file.fits' if test_fqn.exists(): test_fqn.unlink() def info_mock(ignore1, ignore2): return { 'type': 'application/fits', 'md5sum': 'abc', 'size': 42, } def get_mock(ignore1, ignore2, destination, **kwargs): fhead = kwargs.get('fhead') if fhead: destination.write(TEST_HEADERS) else: test_fqn.write_text('CadcDataClient') cadc_client_mock.return_value.get_file_info.side_effect = info_mock cadc_client_mock.return_value.get_file.side_effect = get_mock cadc_client_mock.return_value.put_file = Mock(autospec=True) test_wrapper = data_util.StorageClientWrapper( subject=test_subject, using_storage_inventory=False, ) assert test_wrapper is not None, 'ctor failure' # info test_result = test_wrapper.info(test_uri) _check_info_result(test_result) # get_head test_result = test_wrapper.get_head(test_uri) _check_header_result(test_result) # get test_wrapper.get(test_working_directory, test_uri) _check_get_result(test_fqn) # put test_wrapper.put(test_working_directory, test_uri) _check_put_result(cadc_client_mock.return_value.put_file) # delete with pytest.raises(NotImplementedError): test_wrapper.remove(test_uri) cadc_client_mock.return_value.get_file_info.side_effect = ( exceptions.UnexpectedException('get_file_info') ) cadc_client_mock.return_value.get_file.side_effect = ( exceptions.UnexpectedException('get_file') ) cadc_client_mock.return_value.put_file.side_effect = ( exceptions.UnexpectedException('put_file') ) _fail_mock(test_wrapper, test_uri, test_working_directory) cadc_client_mock.return_value.get_file_info.side_effect = ( exceptions.NotFoundException('cadcinfo') ) test_result = test_wrapper.info(test_uri) assert test_result is None, 'expected when not found'
def _get_mock(ignore, uri): if uri == 'gemini:GEMINI/S20191214S0301.jpg': raise exceptions.UnexpectedException('')