Пример #1
0
 def __init__(self):
     super(MetadataEntryResource, self).__init__()
     self._elastic_search = Elasticsearch(
         '{}:{}'.format(self._config.elastic.elastic_hostname,
                        self._config.elastic.elastic_port))
     self._parser = MetadataIndexingTransformer()
     self._dataset_delete = DataSetRemover()
     self._notifier = CFNotifier(self._config)
    def setUp(self):
        super(DataSetDeleteTest, self).setUp()

        self._delete_obj = DataSetRemover()
        self._delete_obj._elastic_search.delete = self._mock_es_delete = MagicMock()
        self._delete_obj._elastic_search.get = self._mock_es_get = MagicMock()
        self._delete_obj._elastic_search.indices.flush = self._mock_es_flush = MagicMock()
        self._mock_es_get.return_value = self.MOCK_GET
        requests.delete = self._mock_req_delete = MagicMock()
Пример #3
0
class DataSetDeleteTest(DataCatalogTestCase):
    DATA_SET_ID = 'test-entry-id'
    AUTH_TOKEN = 'authorization-token'
    DATABASE_ID = 'database_id'
    TARGET_URI = 'hdfs://URI/DATA/{}/000000_1'.format(DATABASE_ID)
    MOCK_GET = {'_source': {'targetUri': TARGET_URI}}

    def setUp(self):
        super(DataSetDeleteTest, self).setUp()

        self._delete_obj = DataSetRemover()
        self._delete_obj._elastic_search.delete = self._mock_es_delete = MagicMock(
        )
        self._delete_obj._elastic_search.get = self._mock_es_get = MagicMock()
        self._delete_obj._elastic_search.indices.flush = self._mock_es_flush = MagicMock(
        )
        self._mock_es_get.return_value = self.MOCK_GET
        requests.delete = self._mock_req_delete = MagicMock()

    @data(NotFoundError, ConnectionError)
    def test_delete_elasticDeleteErroneous(self, error):
        self._mock_es_delete.side_effect = error
        with self.assertRaises(error):
            self._delete_obj.delete(self.DATA_SET_ID, self.AUTH_TOKEN)

    @data((200, True), (500, False), (404, False))
    @unpack
    def test_delete_dataSetExists_dataSetDeleted(self,
                                                 external_delete_response,
                                                 external_delete_status):
        self._mock_req_delete.return_value.status_code = external_delete_response

        delete_result = self._delete_obj.delete(self.DATA_SET_ID,
                                                self.AUTH_TOKEN)

        self.assertEqual(
            delete_result, {
                'deleted_from_publisher': external_delete_status,
                'deleted_from_downloader': external_delete_status
            })
        calls = [
            call(self._config.services_url.downloader_url_pattern.format(
                self.DATABASE_ID),
                 json=None,
                 headers={'Authorization': self.AUTH_TOKEN}),
            call(self._config.services_url.dataset_publisher_url,
                 json=self.MOCK_GET["_source"],
                 headers={'Authorization': self.AUTH_TOKEN})
        ]
        self._mock_req_delete.assert_has_calls(calls)
Пример #4
0
class DataSetDeleteTest(DataCatalogTestCase):
    DATA_SET_ID = 'test-entry-id'
    AUTH_TOKEN = 'authorization-token'
    DATABASE_ID = 'database_id'
    TARGET_URI = 'hdfs://URI/DATA/{}/000000_1'.format(DATABASE_ID)
    MOCK_GET = {'_source': {'targetUri': TARGET_URI}}

    def setUp(self):
        super(DataSetDeleteTest, self).setUp()

        self._delete_obj = DataSetRemover()
        self._delete_obj._elastic_search.delete = self._mock_es_delete = MagicMock()
        self._delete_obj._elastic_search.get = self._mock_es_get = MagicMock()
        self._delete_obj._elastic_search.indices.flush = self._mock_es_flush = MagicMock()
        self._mock_es_get.return_value = self.MOCK_GET
        requests.delete = self._mock_req_delete = MagicMock()

    @data(NotFoundError, ConnectionError)
    def test_delete_elasticDeleteErroneous(self, error):
        self._mock_es_delete.side_effect = error
        with self.assertRaises(error):
            self._delete_obj.delete(self.DATA_SET_ID, self.AUTH_TOKEN)

    @data((200, True),
          (500, False),
          (404, False))
    @unpack
    def test_delete_dataSetExists_dataSetDeleted(
            self,
            external_delete_response,
            external_delete_status):
        self._mock_req_delete.return_value.status_code = external_delete_response

        delete_result = self._delete_obj.delete(self.DATA_SET_ID, self.AUTH_TOKEN)

        self.assertEqual(
            delete_result,
            {'deleted_from_publisher': external_delete_status, 'deleted_from_downloader': external_delete_status})
        calls = [
            call(self._config.services_url.downloader_url_pattern.format(self.DATABASE_ID),
                 json=None, headers={'Authorization': self.AUTH_TOKEN}),
            call(self._config.services_url.dataset_publisher_url,
                 json=self.MOCK_GET["_source"], headers={'Authorization': self.AUTH_TOKEN})
        ]
        self._mock_req_delete.assert_has_calls(calls)
Пример #5
0
    def setUp(self):
        super(DataSetDeleteTest, self).setUp()

        self._delete_obj = DataSetRemover()
        self._delete_obj._elastic_search.delete = self._mock_es_delete = MagicMock()
        self._delete_obj._elastic_search.get = self._mock_es_get = MagicMock()
        self._delete_obj._elastic_search.indices.flush = self._mock_es_flush = MagicMock()
        self._mock_es_get.return_value = self.MOCK_GET
        requests.delete = self._mock_req_delete = MagicMock()
Пример #6
0
class MetadataEntryResource(DataCatalogResource):

    """
    Storage and retrieval of metadata describing a data set.
    """

    INDEX_ERROR_MESSAGE = 'Putting data set in index failed'
    MISSING_FIELDS_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': missing fields in metadata entry.'
    MALFORMED_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': malformed data in meta data fields.'
    NO_CONNECTION_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': failed to connect to ElasticSearch.'

    def __init__(self):
        super(MetadataEntryResource, self).__init__()
        self._elastic_search = Elasticsearch(
            '{}:{}'.format(self._config.elastic.elastic_hostname,
                           self._config.elastic.elastic_port))
        self._parser = MetadataIndexingTransformer()
        self._dataset_delete = DataSetRemover()
        self._notifier = CFNotifier(self._config)

    @swagger.operation(
        responseClass=QueryHit.__name__,
        nickname='get_entry',
        parameters=[
            {
                'name': 'entry_id',
                'description': 'ID of the metadata entry describing some data set.',
                'required': True,
                'allowMultiple': False,
                'dataType': 'string',
                'paramType': 'path'
            }
        ],
        responseMessages=[
            {
                'code': 403,
                'message': 'Forbidden access to the resource'
            },
            {
                'code': 404,
                'message': 'No entry with the given ID found.'
            },
            {
                'code': 503,
                'message': 'Problem while connecting to the index.'
            }
        ]
    )
    def get(self, entry_id):
        """
        Gets a metadata entry labeled with the given ID.
        """
        if not flask.g.is_admin \
                and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list \
                and not self._get_is_public_status(entry_id):
            self._log.warning('Forbidden access to the resource')
            return None, 403

        try:
            return self._elastic_search.get(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id)
        except NotFoundError:
            self._log.exception('Data set with the given ID not found.')
            return None, 404
        except ConnectionError:
            self._log.exception('No connection to the index.')
            return None, 503

    @swagger.operation(
        nickname='insert_entry',
        parameters=[
            {
                'name': 'entry_id',
                'description': 'ID that will be given to the metadata entry.',
                'required': True,
                'allowMultiple': False,
                'dataType': 'string',
                'paramType': 'path'
            },
            {
                'name': 'body',
                'description': 'JSON-formatted metadata entry.',
                'required': True,
                'allowMultiple': False,
                'dataType': InputMetadataEntry.__name__,
                'paramType': 'body'
            }
        ],
        responseMessages=[
            {
                'code': 200,
                'message': 'Entry updated.'
            },
            {
                'code': 201,
                'message': 'Entry created.'
            },
            {
                'code': 403,
                'message': 'Forbidden access to required organisation'
            },
            {
                'code': 400,
                'message': MALFORMED_ERROR_MESSAGE
            },
            {
                'code': 503,
                'message': NO_CONNECTION_ERROR_MESSAGE
            }
        ]
    )
    def put(self, entry_id):
        """
        Puts a metadata entry in the search index under the given ID.
        """
        entry = flask.request.get_json(force=True)
        if not flask.g.is_admin and entry["orgUUID"] not in flask.g.org_uuid_list:
            self._log.warning('Forbidden access to the organisation')
            self._notify(entry, 'Forbidden access to the organisation')
            return None, 403

        try:
            self._log.info("processed entry: " + str(entry))
            self._parser.transform(entry)
        except InvalidEntryError as ex:
            self._log.error(ex.value)
            self._notify(entry, 'Error durning parsing entry')
            abort(400, ex.value)

        try:
            response = self._elastic_search.index(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id,
                body=entry
            )
            self._notify(entry, 'Dataset added')
            if response['created']:
                return None, 201
            else:
                return None, 200
        except RequestError:
            self._log.exception(self.MALFORMED_ERROR_MESSAGE)
            self._notify(entry, self.MALFORMED_ERROR_MESSAGE)
            return None, 400
        except ConnectionError:
            self._log.exception(self.NO_CONNECTION_ERROR_MESSAGE)
            self._notify(entry, self.NO_CONNECTION_ERROR_MESSAGE)
            return None, 503

    @swagger.operation(
        responseClass=DeleteResponse.__name__,
        nickname='delete_entry',
        parameters=[
            {
                'name': 'entry_id',
                'description': 'ID of the metadata entry describing some data set.',
                'required': True,
                'allowMultiple': False,
                'dataType': 'string',
                'paramType': 'path'
            }
        ],
        responseMessages=[
            {
                'code': 200,
                'message': 'Entry has been removed from Elastic Search. '
                           'Status of deletion from external services is in response\'s body'
            },
            {
                'code': 401,
                'message': 'Authorization header not found.'
            },
            {
                'code': 403,
                'message': 'Forbidden access to the resource'
            },
            {
                'code': 404,
                'message': 'No entry with the given ID found.'
            },
            {
                'code': 503,
                'message': 'Problem connecting to ElasticSearch.'
            }
        ]
    )
    def delete(self, entry_id):
        """
        Deletes a metadata entry labeled with the given ID.
        """
        if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list:
            self._log.warning('Forbidden access to the resource')
            return None, 403
        token = flask.request.headers.get('Authorization')
        if not token:
            self._log.error('Authorization header not found.')
            return None, 401
        try:
            self._dataset_delete.delete(entry_id, token)
            return None, 200
        except NotFoundError:
            self._log.exception('Data set with the given ID not found.')
            return None, 404
        except ConnectionError:
            self._log.exception('No connection to the index.')
            return None, 503


    @swagger.operation(
        nickname='update_attributes',
        parameters=[
            {
                'name': 'entry_id',
                'description': 'ID of a metadata entry (data set).',
                'required': True,
                'allowMultiple': False,
                'dataType': 'string',
                'paramType': 'path'
            },
            {
                'name': 'body',
                'description': 'Attributes with values to change',
                'required': True,
                'allowMultiple': False,
                'dataType': IndexedMetadataEntry.__name__,
                'paramType': 'body'
            }
        ],
        responseMessages=[
            {
                'code': 200,
                'message': 'Data set attributes are updated.'
            },
            {
                'code': 400,
                'message': 'Wrong input data.'
            },
            {
                'code': 403,
                'message': 'Forbidden access to the resource'
            },
            {
                'code': 404,
                'message': 'No entry with the given ID found.'
            }
        ]
    )
    def post(self, entry_id):
        """
        Updates specified attributes of metadata entry with the given ID.
        The body of the POST method should be formed in a following way:

        {
            "argumentName": ["value01", "value02"]
        }

        The value of a given argument will replace current value for this argument
        in the specified metadata entry.

        Example:
        {
            "orgUUID": ["org-id-01", "public"],
            "title": "A new, better title for this data set!"
        }
        """
        if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list:
            self._log.exception('Forbidden access to the resource')
            return None, 403
        exception_message = "Failed to update the data set's attributes."

        body = flask.request.get_json(force=True)
        if not set(body).issubset(IndexedMetadataEntry.resource_fields):
            self._log.warn('Request body is invalid. Data: %s', flask.request.data)
            abort(400)
        body_dict = {'doc': body}

        try:
            self._elastic_search.update(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id,
                body=body_dict)
        except NotFoundError:
            self._log.exception(exception_message)
            abort(404)
        except ConnectionError:
            self._log.exception('No connection to the index.')
            return None, 503
        return

    def _notify(self, entry, message):
        """
        helper function for formating notifier messages
        """
        notify_msg = '{} - {}'.format(entry.get('sourceUri', ''), message)
        self._notifier.notify(notify_msg, entry['orgUUID'])

    def _get_org_uuid(self, entry_id):
        return self._get_entry(entry_id)["orgUUID"]

    def _get_is_public_status(self, entry_id):
        return self._get_entry(entry_id)["isPublic"]

    def _get_entry(self, entry_id):
        try:
            return self._elastic_search.get(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id)["_source"]

        except NotFoundError:
            self._log.exception("Not found")
            abort(404)
class MetadataEntryResource(DataCatalogResource):

    """
    Storage and retrieval of metadata describing a data set.
    """

    INDEX_ERROR_MESSAGE = 'Putting data set in index failed'
    MISSING_FIELDS_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': missing fields in metadata entry.'
    MALFORMED_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': malformed data in meta data fields.'
    NO_CONNECTION_ERROR_MESSAGE = INDEX_ERROR_MESSAGE + ': failed to connect to ElasticSearch.'

    def __init__(self):
        super(MetadataEntryResource, self).__init__()
        self._elastic_search = Elasticsearch(
            '{}:{}'.format(self._config.elastic.elastic_hostname,
                           self._config.elastic.elastic_port))
        self._parser = MetadataIndexingTransformer()
        self._dataset_delete = DataSetRemover()
        self._notifier = CFNotifier(self._config)

    def get(self, entry_id):
        """
        Gets a metadata entry labeled with the given ID.
        """
        if not flask.g.is_admin \
                and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list \
                and not self._get_is_public_status(entry_id):
            self._log.warning('Forbidden access to the resource')
            return None, 403

        try:
            return self._elastic_search.get(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id)
        except NotFoundError:
            self._log.exception('Data set with the given ID not found.')
            return None, 404
        except ConnectionError:
            self._log.exception('No connection to the index.')
            return None, 503

    def put(self, entry_id):
        """
        Puts a metadata entry in the search index under the given ID.
        """
        entry = flask.request.get_json(force=True)
        if not flask.g.is_admin and entry["orgUUID"] not in flask.g.org_uuid_list:
            self._log.warning('Forbidden access to the organisation')
            self._notify(entry, 'Forbidden access to the organisation')
            return None, 403

        try:
            self._log.info("processed entry: " + str(entry))
            self._parser.transform(entry)
        except InvalidEntryError as ex:
            self._log.error(ex.value)
            self._notify(entry, 'Error durning parsing entry')
            abort(400, ex.value)

        return self.add_data_set(entry_id, entry)

    def add_data_set(self, entry_id, entry):
        try:
            response = self._elastic_search.index(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id,
                body=entry
            )
            self._notify(entry, 'Dataset added')
            if response['created']:
                return None, 201
            else:
                return None, 200
        except RequestError:
            self._log.exception(self.MALFORMED_ERROR_MESSAGE)
            self._notify(entry, self.MALFORMED_ERROR_MESSAGE)
            return None, 400
        except ConnectionError:
            self._log.exception(self.NO_CONNECTION_ERROR_MESSAGE)
            self._notify(entry, self.NO_CONNECTION_ERROR_MESSAGE)
            return None, 503

    def delete(self, entry_id):
        """
        Deletes a metadata entry labeled with the given ID.
        """
        entry = self._get_entry(entry_id)
        if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list:
            self._log.warning('Forbidden access to the resource')
            return None, 403
        token = flask.request.headers.get('Authorization')
        if not token:
            self._log.error('Authorization header not found.')
            return None, 401
        try:
            deletion_status = self._dataset_delete.delete(entry_id, token)
            self._notify(entry, "Dataset deleted")
            return deletion_status, 200
        except NotFoundError:
            self._log.exception('Data set with the given ID not found.')
            self._notify(entry, "Data set with the given ID not found.")
            return None, 404
        except ConnectionError:
            self._log.exception('No connection to the index.')
            self._notify(entry, 'No connection to the index.')
            return None, 503

    def post(self, entry_id):
        """
        Updates specified attributes of metadata entry with the given ID.
        The body of the POST method should be formed in a following way:

        {
            "argumentName": ["value01", "value02"]
        }

        The value of a given argument will replace current value for this argument
        in the specified metadata entry.

        Example:
        {
            "title": "A new, better title for this data set!"
        }
        """
        if not flask.g.is_admin and self._get_org_uuid(entry_id) not in flask.g.org_uuid_list:
            self._log.exception('Forbidden access to the resource')
            return None, 403
        exception_message = "Failed to update the data set's attributes."

        body = flask.request.get_json(force=True)
        if not set(body).issubset(CERBERUS_SCHEMA):
            self._log.warn('Request body is invalid. Data: %s', flask.request.data)
            abort(400)
        body_dict = {'doc': body}

        try:
            if 'isPublic' in body:
                token = self._get_token_from_request()
                self._dataset_delete.delete_public_from_hive(entry_id, token)
        except NotFoundError:
            self._log.exception('Data set with the given ID not found.')
            return None, 404
        except ConnectionError:
            self._log.exception('No connection to the index.')
            return None, 503

        try:
            self._elastic_search.update(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id,
                body=body_dict)
            is_public_status_tag = 'public' if self._get_is_public_status(entry_id) else 'private'
            self._notify(self._get_entry(entry_id),
                         "Dataset changed status on",
                         is_public_status_tag)
        except NotFoundError:
            self._log.exception(exception_message)
            self._notify(self._get_entry(entry_id), exception_message)
            abort(404)
        except ConnectionError:
            self._log.exception('No connection to the index.')
            self._notify(self._get_entry(entry_id), 'No connection to the index.')
            return None, 503

        return

    def _notify(self, entry, message, status=""):
        """
        helper function for formating notifier messages
        """
        notify_msg = '{} - {} {}'.format(entry.get('sourceUri', ''), message, status)
        self._notifier.notify(notify_msg, entry['orgUUID'])

    def _get_org_uuid(self, entry_id):
        return self._get_entry(entry_id)["orgUUID"]

    def _get_is_public_status(self, entry_id):
        return self._get_entry(entry_id)["isPublic"]

    def _get_entry(self, entry_id):
        try:
            return self._elastic_search.get(
                index=self._config.elastic.elastic_index,
                doc_type=self._config.elastic.elastic_metadata_type,
                id=entry_id)["_source"]

        except NotFoundError:
            self._log.exception("Not found")
            abort(404)

    def _get_token_from_request(self):
        token = flask.request.headers.get('Authorization')
        if not token:
            self._log.error('Authorization header not found.')
            return None, 401
        return token