def setUp(self): super(MetadataEntryTransformationTests, self).setUp() self.org_uuid = 'org01' self.test_entry = { self.CATEGORY_FIELD: 'health', 'dataSample': 'some sample', 'format': 'csv', 'recordCount': 13, 'size': 99999, 'sourceUri': 'some uri', self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1', 'title': 'a great title', 'isPublic': True, self.CREATION_TIME_FIELD: '2015-02-13T13:00:00', self.ORG_UUID_FIELD: self.org_uuid } self.test_entry_index = { self.CATEGORY_FIELD: 'health', 'dataSample': 'some sample', 'format': 'csv', 'recordCount': 13, 'size': 99999, 'sourceUri': 'some uri', self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1', 'title': 'a great title', 'isPublic': True, self.CREATION_TIME_FIELD: '2015-02-13T13:00:00', self.ORG_UUID_FIELD: self.org_uuid } self.parser = MetadataIndexingTransformer()
class ElasticSearchAdminResource(DataCatalogResource): """ Contains REST endpoint for managing elastic search data """ def __init__(self): super(ElasticSearchAdminResource, self).__init__() self._elastic_search = Elasticsearch( '{}:{}'.format(self._config.elastic.elastic_hostname, self._config.elastic.elastic_port)) self._parser = MetadataIndexingTransformer() def delete(self): """ Delete elastic search index """ self._log.info('Deleting the ElasticSearch index.') if not flask.g.is_admin: self._log.warn('Deleting index aborted, not enough privileges (admin required)') return None, 403 # pylint: disable=unexpected-keyword-arg self._elastic_search.indices.delete( self._config.elastic.elastic_index, ignore=404) def put(self): """ Add all data into elastic search. Data that are corrupted are ommited """ self._log.info("Adding data to elastic search") if not flask.g.is_admin: self._log.warn('Inserting data aborted, not enough privileges (admin required)') return None, 403 data = flask.request.get_json(force=True) try: for entry in data: try: self._parser.transform(entry) self._elastic_search.index( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, id=entry["id"], body=entry ) except InvalidEntryError as ex: self._log.exception(ex) except RequestError: self._log.exception("Malformed data") return None, 400 except ConnectionError: self._log.exception("Failed connection to ElasticSearch") return None, 503 self._log.info("Data added") return None, 200
class MetadataEntryTransformationTests(DataCatalogTestCase): TEST_DATA_SET_ID = 'whatever-id' EXAMPLE_CATEGORIES = {'health', 'finance'} CREATION_TIME_FIELD = 'creationTime' CATEGORY_FIELD = 'category' TARGET_URI_FIELD = 'targetUri' ORG_UUID_FIELD = 'orgUUID' def setUp(self): super(MetadataEntryTransformationTests, self).setUp() self.org_uuid = 'org01' self.test_entry = { self.CATEGORY_FIELD: 'health', 'dataSample': 'some sample', 'format': 'csv', 'recordCount': 13, 'size': 99999, 'sourceUri': 'some uri', self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1', 'title': 'a great title', 'isPublic': True, self.CREATION_TIME_FIELD: '2015-02-13T13:00:00', self.ORG_UUID_FIELD: self.org_uuid } self.test_entry_index = { self.CATEGORY_FIELD: 'health', 'dataSample': 'some sample', 'format': 'csv', 'recordCount': 13, 'size': 99999, 'sourceUri': 'some uri', self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1', 'title': 'a great title', 'isPublic': True, self.CREATION_TIME_FIELD: '2015-02-13T13:00:00', self.ORG_UUID_FIELD: self.org_uuid } self.parser = MetadataIndexingTransformer() def test_entryTransformation_validEntry_entryTransformed(self): self.parser.transform(self.test_entry) self.assertDictEqual( self.test_entry_index, self.test_entry) def test_entryTransformation_invalidEntryURIs_raisesInvalidEntryError(self): def check_raises_for_url(url): self.test_entry[self.TARGET_URI_FIELD] = url self.assertRaises(InvalidEntryError, self.parser.transform, self.test_entry) check_raises_for_url('//onet.pl/') check_raises_for_url('hdfs://onet.pl/') check_raises_for_url('http://') check_raises_for_url('some_path') def test_entryTransformation_invalidEntryMissingField_raisesInvalidEntryError(self): del self.test_entry['dataSample'] self.assertRaises(InvalidEntryError, self.parser.transform, self.test_entry) def test_entryTransformation_missingDate_dateCreated(self): del self.test_entry[self.CREATION_TIME_FIELD] self.parser.transform(self.test_entry) self.assertTrue(self.test_entry.__contains__(self.CREATION_TIME_FIELD))
def __init__(self): super(ElasticSearchAdminResource, self).__init__() self._elastic_search = Elasticsearch( '{}:{}'.format(self._config.elastic.elastic_hostname, self._config.elastic.elastic_port)) self._parser = MetadataIndexingTransformer()