def __init__(self): super(MetadataEntryResource, self).__init__() self._elastic_search = Elasticsearch( '{}:{}'.format(self._config.elastic.elastic_hostname, self._config.elastic.elastic_port)) self._parser = MetadataIndexingTransformer() self._dataset_delete = DataSetRemover() self._notifier = CFNotifier(self._config)
def setUp(self): super(DataSetDeleteTest, self).setUp() self._delete_obj = DataSetRemover() self._delete_obj._elastic_search.delete = self._mock_es_delete = MagicMock() self._delete_obj._elastic_search.get = self._mock_es_get = MagicMock() self._delete_obj._elastic_search.indices.flush = self._mock_es_flush = MagicMock() self._mock_es_get.return_value = self.MOCK_GET requests.delete = self._mock_req_delete = MagicMock()
class DataSetDeleteTest(DataCatalogTestCase): DATA_SET_ID = 'test-entry-id' AUTH_TOKEN = 'authorization-token' DATABASE_ID = 'database_id' TARGET_URI = 'hdfs://URI/DATA/{}/000000_1'.format(DATABASE_ID) MOCK_GET = {'_source': {'targetUri': TARGET_URI}} def setUp(self): super(DataSetDeleteTest, self).setUp() self._delete_obj = DataSetRemover() self._delete_obj._elastic_search.delete = self._mock_es_delete = MagicMock( ) self._delete_obj._elastic_search.get = self._mock_es_get = MagicMock() self._delete_obj._elastic_search.indices.flush = self._mock_es_flush = MagicMock( ) self._mock_es_get.return_value = self.MOCK_GET requests.delete = self._mock_req_delete = MagicMock() @data(NotFoundError, ConnectionError) def test_delete_elasticDeleteErroneous(self, error): self._mock_es_delete.side_effect = error with self.assertRaises(error): self._delete_obj.delete(self.DATA_SET_ID, self.AUTH_TOKEN) @data((200, True), (500, False), (404, False)) @unpack def test_delete_dataSetExists_dataSetDeleted(self, external_delete_response, external_delete_status): self._mock_req_delete.return_value.status_code = external_delete_response delete_result = self._delete_obj.delete(self.DATA_SET_ID, self.AUTH_TOKEN) self.assertEqual( delete_result, { 'deleted_from_publisher': external_delete_status, 'deleted_from_downloader': external_delete_status }) calls = [ call(self._config.services_url.downloader_url_pattern.format( self.DATABASE_ID), json=None, headers={'Authorization': self.AUTH_TOKEN}), call(self._config.services_url.dataset_publisher_url, json=self.MOCK_GET["_source"], headers={'Authorization': self.AUTH_TOKEN}) ] self._mock_req_delete.assert_has_calls(calls)
class DataSetDeleteTest(DataCatalogTestCase): DATA_SET_ID = 'test-entry-id' AUTH_TOKEN = 'authorization-token' DATABASE_ID = 'database_id' TARGET_URI = 'hdfs://URI/DATA/{}/000000_1'.format(DATABASE_ID) MOCK_GET = {'_source': {'targetUri': TARGET_URI}} def setUp(self): super(DataSetDeleteTest, self).setUp() self._delete_obj = DataSetRemover() self._delete_obj._elastic_search.delete = self._mock_es_delete = MagicMock() self._delete_obj._elastic_search.get = self._mock_es_get = MagicMock() self._delete_obj._elastic_search.indices.flush = self._mock_es_flush = MagicMock() self._mock_es_get.return_value = self.MOCK_GET requests.delete = self._mock_req_delete = MagicMock() @data(NotFoundError, ConnectionError) def test_delete_elasticDeleteErroneous(self, error): self._mock_es_delete.side_effect = error with self.assertRaises(error): self._delete_obj.delete(self.DATA_SET_ID, self.AUTH_TOKEN) @data((200, True), (500, False), (404, False)) @unpack def test_delete_dataSetExists_dataSetDeleted( self, external_delete_response, external_delete_status): self._mock_req_delete.return_value.status_code = external_delete_response delete_result = self._delete_obj.delete(self.DATA_SET_ID, self.AUTH_TOKEN) self.assertEqual( delete_result, {'deleted_from_publisher': external_delete_status, 'deleted_from_downloader': external_delete_status}) calls = [ call(self._config.services_url.downloader_url_pattern.format(self.DATABASE_ID), json=None, headers={'Authorization': self.AUTH_TOKEN}), call(self._config.services_url.dataset_publisher_url, json=self.MOCK_GET["_source"], headers={'Authorization': self.AUTH_TOKEN}) ] self._mock_req_delete.assert_has_calls(calls)
class MetadataEntryResource(DataCatalogResource): """ Storage and retrieval of metadata describing a data set. """ INDEX_ERROR_MESSAGE = 'Putting data set in index failed' MISSING_FIELDS_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': missing fields in metadata entry.' MALFORMED_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': malformed data in meta data fields.' NO_CONNECTION_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': failed to connect to ElasticSearch.' def __init__(self): super(MetadataEntryResource, self).__init__() self._elastic_search = Elasticsearch( '{}:{}'.format(self._config.elastic.elastic_hostname, self._config.elastic.elastic_port)) self._parser = MetadataIndexingTransformer() self._dataset_delete = DataSetRemover() self._notifier = CFNotifier(self._config) @swagger.operation( responseClass=QueryHit.__name__, nickname='get_entry', parameters=[ { 'name': 'entry_id', 'description': 'ID of the metadata entry describing some data set.', 'required': True, 'allowMultiple': False, 'dataType': 'string', 'paramType': 'path' } ], responseMessages=[ { 'code': 403, 'message': 'Forbidden access to the resource' }, { 'code': 404, 'message': 'No entry with the given ID found.' }, { 'code': 503, 'message': 'Problem while connecting to the index.' } ] ) def get(self, entry_id): """ Gets a metadata entry labeled with the given ID. """ if not flask.g.is_admin \ and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list \ and not self._get_is_public_status(entry_id): self._log.warning('Forbidden access to the resource') return None, 403 try: return self._elastic_search.get( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id) except NotFoundError: self._log.exception('Data set with the given ID not found.') return None, 404 except ConnectionError: self._log.exception('No connection to the index.') return None, 503 @swagger.operation( nickname='insert_entry', parameters=[ { 'name': 'entry_id', 'description': 'ID that will be given to the metadata entry.', 'required': True, 'allowMultiple': False, 'dataType': 'string', 'paramType': 'path' }, { 'name': 'body', 'description': 'JSON-formatted metadata entry.', 'required': True, 'allowMultiple': False, 'dataType': InputMetadataEntry.__name__, 'paramType': 'body' } ], responseMessages=[ { 'code': 200, 'message': 'Entry updated.' }, { 'code': 201, 'message': 'Entry created.' }, { 'code': 403, 'message': 'Forbidden access to required organisation' }, { 'code': 400, 'message': MALFORMED_ERROR_MESSAGE }, { 'code': 503, 'message': NO_CONNECTION_ERROR_MESSAGE } ] ) def put(self, entry_id): """ Puts a metadata entry in the search index under the given ID. """ entry = flask.request.get_json(force=True) if not flask.g.is_admin and entry["orgUUID"] not in flask.g.org_uuid_list: self._log.warning('Forbidden access to the organisation') self._notify(entry, 'Forbidden access to the organisation') return None, 403 try: self._log.info("processed entry: " + str(entry)) self._parser.transform(entry) except InvalidEntryError as ex: self._log.error(ex.value) self._notify(entry, 'Error durning parsing entry') abort(400, ex.value) try: response = self._elastic_search.index( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id, body=entry ) self._notify(entry, 'Dataset added') if response['created']: return None, 201 else: return None, 200 except RequestError: self._log.exception(self.MALFORMED_ERROR_MESSAGE) self._notify(entry, self.MALFORMED_ERROR_MESSAGE) return None, 400 except ConnectionError: self._log.exception(self.NO_CONNECTION_ERROR_MESSAGE) self._notify(entry, self.NO_CONNECTION_ERROR_MESSAGE) return None, 503 @swagger.operation( responseClass=DeleteResponse.__name__, nickname='delete_entry', parameters=[ { 'name': 'entry_id', 'description': 'ID of the metadata entry describing some data set.', 'required': True, 'allowMultiple': False, 'dataType': 'string', 'paramType': 'path' } ], responseMessages=[ { 'code': 200, 'message': 'Entry has been removed from Elastic Search. ' 'Status of deletion from external services is in response\'s body' }, { 'code': 401, 'message': 'Authorization header not found.' }, { 'code': 403, 'message': 'Forbidden access to the resource' }, { 'code': 404, 'message': 'No entry with the given ID found.' }, { 'code': 503, 'message': 'Problem connecting to ElasticSearch.' } ] ) def delete(self, entry_id): """ Deletes a metadata entry labeled with the given ID. """ if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list: self._log.warning('Forbidden access to the resource') return None, 403 token = flask.request.headers.get('Authorization') if not token: self._log.error('Authorization header not found.') return None, 401 try: self._dataset_delete.delete(entry_id, token) return None, 200 except NotFoundError: self._log.exception('Data set with the given ID not found.') return None, 404 except ConnectionError: self._log.exception('No connection to the index.') return None, 503 @swagger.operation( nickname='update_attributes', parameters=[ { 'name': 'entry_id', 'description': 'ID of a metadata entry (data set).', 'required': True, 'allowMultiple': False, 'dataType': 'string', 'paramType': 'path' }, { 'name': 'body', 'description': 'Attributes with values to change', 'required': True, 'allowMultiple': False, 'dataType': IndexedMetadataEntry.__name__, 'paramType': 'body' } ], responseMessages=[ { 'code': 200, 'message': 'Data set attributes are updated.' }, { 'code': 400, 'message': 'Wrong input data.' }, { 'code': 403, 'message': 'Forbidden access to the resource' }, { 'code': 404, 'message': 'No entry with the given ID found.' } ] ) def post(self, entry_id): """ Updates specified attributes of metadata entry with the given ID. The body of the POST method should be formed in a following way: { "argumentName": ["value01", "value02"] } The value of a given argument will replace current value for this argument in the specified metadata entry. Example: { "orgUUID": ["org-id-01", "public"], "title": "A new, better title for this data set!" } """ if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list: self._log.exception('Forbidden access to the resource') return None, 403 exception_message = "Failed to update the data set's attributes." body = flask.request.get_json(force=True) if not set(body).issubset(IndexedMetadataEntry.resource_fields): self._log.warn('Request body is invalid. Data: %s', flask.request.data) abort(400) body_dict = {'doc': body} try: self._elastic_search.update( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id, body=body_dict) except NotFoundError: self._log.exception(exception_message) abort(404) except ConnectionError: self._log.exception('No connection to the index.') return None, 503 return def _notify(self, entry, message): """ helper function for formating notifier messages """ notify_msg = '{} - {}'.format(entry.get('sourceUri', ''), message) self._notifier.notify(notify_msg, entry['orgUUID']) def _get_org_uuid(self, entry_id): return self._get_entry(entry_id)["orgUUID"] def _get_is_public_status(self, entry_id): return self._get_entry(entry_id)["isPublic"] def _get_entry(self, entry_id): try: return self._elastic_search.get( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id)["_source"] except NotFoundError: self._log.exception("Not found") abort(404)
class MetadataEntryResource(DataCatalogResource): """ Storage and retrieval of metadata describing a data set. """ INDEX_ERROR_MESSAGE = 'Putting data set in index failed' MISSING_FIELDS_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': missing fields in metadata entry.' MALFORMED_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': malformed data in meta data fields.' NO_CONNECTION_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': failed to connect to ElasticSearch.' def __init__(self): super(MetadataEntryResource, self).__init__() self._elastic_search = Elasticsearch( '{}:{}'.format(self._config.elastic.elastic_hostname, self._config.elastic.elastic_port)) self._parser = MetadataIndexingTransformer() self._dataset_delete = DataSetRemover() self._notifier = CFNotifier(self._config) def get(self, entry_id): """ Gets a metadata entry labeled with the given ID. """ if not flask.g.is_admin \ and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list \ and not self._get_is_public_status(entry_id): self._log.warning('Forbidden access to the resource') return None, 403 try: return self._elastic_search.get( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id) except NotFoundError: self._log.exception('Data set with the given ID not found.') return None, 404 except ConnectionError: self._log.exception('No connection to the index.') return None, 503 def put(self, entry_id): """ Puts a metadata entry in the search index under the given ID. """ entry = flask.request.get_json(force=True) if not flask.g.is_admin and entry["orgUUID"] not in flask.g.org_uuid_list: self._log.warning('Forbidden access to the organisation') self._notify(entry, 'Forbidden access to the organisation') return None, 403 try: self._log.info("processed entry: " + str(entry)) self._parser.transform(entry) except InvalidEntryError as ex: self._log.error(ex.value) self._notify(entry, 'Error durning parsing entry') abort(400, ex.value) return self.add_data_set(entry_id, entry) def add_data_set(self, entry_id, entry): try: response = self._elastic_search.index( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id, body=entry ) self._notify(entry, 'Dataset added') if response['created']: return None, 201 else: return None, 200 except RequestError: self._log.exception(self.MALFORMED_ERROR_MESSAGE) self._notify(entry, self.MALFORMED_ERROR_MESSAGE) return None, 400 except ConnectionError: self._log.exception(self.NO_CONNECTION_ERROR_MESSAGE) self._notify(entry, self.NO_CONNECTION_ERROR_MESSAGE) return None, 503 def delete(self, entry_id): """ Deletes a metadata entry labeled with the given ID. """ entry = self._get_entry(entry_id) if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list: self._log.warning('Forbidden access to the resource') return None, 403 token = flask.request.headers.get('Authorization') if not token: self._log.error('Authorization header not found.') return None, 401 try: deletion_status = self._dataset_delete.delete(entry_id, token) self._notify(entry, "Dataset deleted") return deletion_status, 200 except NotFoundError: self._log.exception('Data set with the given ID not found.') self._notify(entry, "Data set with the given ID not found.") return None, 404 except ConnectionError: self._log.exception('No connection to the index.') self._notify(entry, 'No connection to the index.') return None, 503 def post(self, entry_id): """ Updates specified attributes of metadata entry with the given ID. The body of the POST method should be formed in a following way: { "argumentName": ["value01", "value02"] } The value of a given argument will replace current value for this argument in the specified metadata entry. Example: { "title": "A new, better title for this data set!" } """ if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list: self._log.exception('Forbidden access to the resource') return None, 403 exception_message = "Failed to update the data set's attributes." body = flask.request.get_json(force=True) if not set(body).issubset(CERBERUS_SCHEMA): self._log.warn('Request body is invalid. Data: %s', flask.request.data) abort(400) body_dict = {'doc': body} try: if 'isPublic' in body: token = self._get_token_from_request() self._dataset_delete.delete_public_from_hive(entry_id, token) except NotFoundError: self._log.exception('Data set with the given ID not found.') return None, 404 except ConnectionError: self._log.exception('No connection to the index.') return None, 503 try: self._elastic_search.update( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id, body=body_dict) is_public_status_tag = 'public' if self._get_is_public_status(entry_id) else 'private' self._notify(self._get_entry(entry_id), "Dataset changed status on", is_public_status_tag) except NotFoundError: self._log.exception(exception_message) self._notify(self._get_entry(entry_id), exception_message) abort(404) except ConnectionError: self._log.exception('No connection to the index.') self._notify(self._get_entry(entry_id), 'No connection to the index.') return None, 503 return def _notify(self, entry, message, status=""): """ helper function for formating notifier messages """ notify_msg = '{} - {} {}'.format(entry.get('sourceUri', ''), message, status) self._notifier.notify(notify_msg, entry['orgUUID']) def _get_org_uuid(self, entry_id): return self._get_entry(entry_id)["orgUUID"] def _get_is_public_status(self, entry_id): return self._get_entry(entry_id)["isPublic"] def _get_entry(self, entry_id): try: return self._elastic_search.get( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry_id)["_source"] except NotFoundError: self._log.exception("Not found") abort(404) def _get_token_from_request(self): token = flask.request.headers.get('Authorization') if not token: self._log.error('Authorization header not found.') return None, 401 return token