Exemplo n.º 1
0
    def test_create_tag_non_existent_template_should_not_raise_error(
            self,
            create_fileset_enricher_tag_template,
            get_fileset_enricher_tag_template,
            list_tags,  # noqa: E125
            create_tag):
        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        tag_template = MockedObject()
        tag_template.name = 'projects/my-project/locations/my-location/' \
                            'tagTemplates/fileset_template'

        create_fileset_enricher_tag_template.return_value = tag_template
        get_fileset_enricher_tag_template.side_effect = PermissionDenied(
            'entry does not exist')
        list_tags.return_value = []

        stats = self.__create_full_stats_obj()

        datacatalog_helper.create_tag_from_stats(entry, stats)

        create_fileset_enricher_tag_template.assert_called_once()
        get_fileset_enricher_tag_template.assert_called_once()
        list_tags.assert_called_once()
        create_tag.assert_called_once()
Exemplo n.º 2
0
    def test_delete_entries_error_on_delete_entry_group_should_not_leak_error(
            self, delete_entry_group, delete_entry,
            search_catalog):  # noqa: E501

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        entry.relative_resource_name = 'entry_group/entries/entry_id'

        entry_2 = MockedObject()
        entry_2.name = 'fileset_entry'
        entry_2.relative_resource_name = 'entry_group/entries/entry_id_2'

        entry_3 = MockedObject()
        entry_3.name = 'fileset_entry'
        entry_3.relative_resource_name = 'entry_group_2/entries/entry_id_3'

        search_catalog.return_value = [entry, entry_2, entry_3]

        delete_entry_group.side_effect = Exception('error on delete entry')

        datacatalog_helper.delete_entries_and_entry_groups()

        search_catalog.assert_called_once()
        self.assertEqual(3, delete_entry.call_count)
        self.assertEqual(2, delete_entry_group.call_count)
Exemplo n.º 3
0
    def test_create_tag_with_no_files_found_should_not_raise_error(
            self, create_fileset_enricher_tag_template,
            get_fileset_enricher_tag_template, list_tags,
            create_tag):  # noqa: E501

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        tag_template = MockedObject()
        tag_template.name = 'fileset_template'

        get_fileset_enricher_tag_template.return_value = tag_template
        list_tags.return_value = []

        stats = {
            'prefix': 'gs://my_bucket/*.csv',
            'count': 0,
            'files_by_bucket': '',
            'buckets_found': 0,
            'execution_time': pd.Timestamp.utcnow()
        }

        datacatalog_helper.create_tag_from_stats(entry, stats)

        create_fileset_enricher_tag_template.assert_not_called()
        get_fileset_enricher_tag_template.assert_called_once()
        list_tags.assert_called_once()
        create_tag.assert_called_once()
Exemplo n.º 4
0
    def test_create_tag_with_bucket_prefix_should_not_raise_error(
            self,
            create_fileset_enricher_tag_template,
            get_fileset_enricher_tag_template,
            list_tags,  # noqa: E125
            create_tag):  # noqa:E125

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        tag_template = MockedObject()
        tag_template.name = 'fileset_template'

        get_fileset_enricher_tag_template.return_value = tag_template
        list_tags.return_value = []

        stats = self.__create_full_stats_obj()
        stats['bucket_prefix'] = 'my_bucket'

        datacatalog_helper.create_tag_from_stats(entry, stats)

        create_fileset_enricher_tag_template.assert_not_called()
        get_fileset_enricher_tag_template.assert_called_once()
        list_tags.assert_called_once()
        create_tag.assert_called_once()
Exemplo n.º 5
0
    def test_delete_tag_template_error_should_not_leak(self,
                                                       delete_tag_template):
        datacatalog_helper = DataCatalogHelper('test_project')

        delete_tag_template.side_effect = Exception(
            'error on delete tag template')

        datacatalog_helper.delete_tag_template()

        delete_tag_template.assert_called_once()
Exemplo n.º 6
0
    def test_create_tag_providing_tag_fields_with_key_error_should_leak_the_error(
            self,
            create_fileset_enricher_tag_template,
            get_fileset_enricher_tag_template,
            list_tags,  # noqa: E125
            create_tag):  # noqa:E125

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        tag_template = MockedObject()
        tag_template.name = 'projects/my-project/locations/my-location/' \
                            'tagTemplates/fileset_template'

        create_fileset_enricher_tag_template.return_value = tag_template
        get_fileset_enricher_tag_template.side_effect = PermissionDenied(
            'entry does not exist')
        list_tags.return_value = []

        stats = {
            'prefix': 'gs://my_bucket/*.csv',
            'count': 0,
            'files_by_bucket': '',
            'buckets_found': 0,
            'execution_time': pd.Timestamp.utcnow()
        }

        fields = ['files', 'prefix']

        datacatalog_helper.create_tag_from_stats(entry, stats, fields)

        tag = create_tag.call_args_list[0][1]['tag']

        fields_dict = MessageToDict(tag)['fields']

        self.assertTrue(set(fields).issubset(fields_dict))
        self.assertIn('execution_time', fields_dict)
        self.assertNotIn('files_by_bucket', fields_dict)
        self.assertNotIn('buckets_found', fields_dict)
        self.assertNotIn('bucket_prefix', fields_dict)
        self.assertNotIn('min_file_size', fields_dict)
        self.assertNotIn('max_file_size', fields_dict)
        self.assertNotIn('avg_file_size', fields_dict)
        self.assertNotIn('first_created_date', fields_dict)
        self.assertNotIn('last_created_date', fields_dict)
        self.assertNotIn('last_updated_date', fields_dict)
        self.assertNotIn('created_files_by_day', fields_dict)
        self.assertNotIn('updated_files_by_day', fields_dict)

        create_fileset_enricher_tag_template.assert_called_once()
        get_fileset_enricher_tag_template.assert_called_once()
        list_tags.assert_called_once()
        create_tag.assert_called_once()
Exemplo n.º 7
0
    def test_synchronize_entries_tags_should_do_nothing_when_no_tags_are_provided(
            self, list_tags, create_tag, update_tag):  # noqa: E501

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        entry.relative_resource_name = 'entry_group/entries/entry_id'

        datacatalog_helper.synchronize_entry_tags(entry, [])

        create_tag.assert_not_called()
        update_tag.assert_not_called()
        list_tags.assert_not_called()
Exemplo n.º 8
0
    def test_create_tag_template_should_create_all_fields(
            self, create_tag_template):
        datacatalog_helper = DataCatalogHelper('test_project')
        datacatalog_helper.create_fileset_enricher_tag_template(
            'projects/my-project/locations/my-location/tagTemplates/tag_template_name'
        )

        tag_template = create_tag_template.call_args_list[0][1]['tag_template']

        self.assertIsNotNone(tag_template)

        self.assertEqual('Number of files found',
                         tag_template.fields['files'].display_name)
        self.assertEqual('Minimum file size found in megabytes',
                         tag_template.fields['min_file_size'].display_name)
        self.assertEqual('Maximum file size found in megabytes',
                         tag_template.fields['max_file_size'].display_name)
        self.assertEqual('Average file size found in megabytes',
                         tag_template.fields['avg_file_size'].display_name)
        self.assertEqual('Total file size found in megabytes',
                         tag_template.fields['total_file_size'].display_name)
        self.assertEqual(
            'First time a file was created in the buckets',
            tag_template.fields['first_created_date'].display_name)
        self.assertEqual('Last time a file was created in the buckets',
                         tag_template.fields['last_created_date'].display_name)
        self.assertEqual('Last time a file was updated in the buckets',
                         tag_template.fields['last_updated_date'].display_name)
        self.assertEqual(
            'Number of files created on the same date',
            tag_template.fields['created_files_by_day'].display_name)
        self.assertEqual(
            'Number of files updated on the same date',
            tag_template.fields['updated_files_by_day'].display_name)
        self.assertEqual('Prefix used to find the files',
                         tag_template.fields['prefix'].display_name)
        self.assertEqual('Buckets without this prefix were ignored',
                         tag_template.fields['bucket_prefix'].display_name)
        self.assertEqual('Number of buckets that matches the prefix',
                         tag_template.fields['buckets_found'].display_name)
        self.assertEqual(
            'Number of files found on each bucket that matches the prefix',
            tag_template.fields['files_by_bucket'].display_name)
        self.assertEqual('Number of files found by file type',
                         tag_template.fields['files_by_type'].display_name)
        self.assertEqual('Execution time when all stats were collected',
                         tag_template.fields['execution_time'].display_name)

        create_tag_template.assert_called_once()
Exemplo n.º 9
0
    def test_synchronize_entries_tags_should_create_tag_when_tag_is_new(
            self, list_tags, create_tag, update_tag):  # noqa: E501

        updated_tag = self.__make_fake_tag()

        list_tags.return_value = []

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        entry.relative_resource_name = 'entry_group/entries/entry_id'

        datacatalog_helper.synchronize_entry_tags(entry, [updated_tag])

        create_tag.assert_called_once()
        update_tag.assert_not_called()
Exemplo n.º 10
0
    def test_get_manually_created_fileset_entries_should_return_successfully(
            self, search_catalog):
        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        entry.relative_resource_name = \
            'projects/uat-env-1/locations/us-central1/entryGroups/entry_group_enricher_1/' \
            'entries/entry_id_enricher_1'

        entry_2 = MockedObject()
        entry_2.name = 'fileset_entry'
        entry_2.relative_resource_name = \
            'projects/uat-env-1/locations/us-central1/entryGroups/entry_group_enricher_1/' \
            'entries/entry_id_enricher_2'

        entry_3 = MockedObject()
        entry_3.name = 'fileset_entry'
        entry_3.relative_resource_name = \
            'projects/uat-env-1/locations/us-central1/entryGroups/entry_group_enricher_2/' \
            'entries/entry_id_enricher_3'

        search_catalog.return_value = [entry, entry_2, entry_3]

        results = datacatalog_helper.get_manually_created_fileset_entries()

        returned_location_1, returned_entry_1_entry_group, returned_entry_1_id = results[
            0]
        returned_location_2, returned_entry_2_entry_group, returned_entry_2_id = results[
            1]
        returned_location_3, returned_entry_3_entry_group, returned_entry_3_id = results[
            2]

        self.assertEqual('us-central1', returned_location_1)
        self.assertEqual('entry_group_enricher_1',
                         returned_entry_1_entry_group)
        self.assertEqual('entry_id_enricher_1', returned_entry_1_id)
        self.assertEqual('us-central1', returned_location_2)
        self.assertEqual('entry_group_enricher_1',
                         returned_entry_2_entry_group)
        self.assertEqual('entry_id_enricher_2', returned_entry_2_id)
        self.assertEqual('us-central1', returned_location_3)
        self.assertEqual('entry_group_enricher_2',
                         returned_entry_3_entry_group)
        self.assertEqual('entry_id_enricher_3', returned_entry_3_id)

        search_catalog.assert_called_once()
Exemplo n.º 11
0
    def test_synchronize_entries_tags_should_update_tag_on_changes(
            self, list_tags, create_tag, update_tag):
        updated_tag = self.__make_fake_tag()
        current_tag = self.__make_fake_tag()
        current_tag.fields['test-double-field'].double_value = 2

        list_tags.return_value = [current_tag]

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        entry.relative_resource_name = 'entry_group/entries/entry_id'

        datacatalog_helper.synchronize_entry_tags(entry, [updated_tag])

        create_tag.assert_not_called()
        update_tag.assert_called_once()
Exemplo n.º 12
0
    def test_delete_entries_and_entry_groups_should_successfully_delete_them(
            self, delete_entry_group, delete_entry, search_catalog):  # noqa

        datacatalog_helper = DataCatalogHelper('test_project')
        entry = MockedObject()
        entry.name = 'fileset_entry'
        entry.relative_resource_name = 'entry_group/entries/entry_id'

        entry_2 = MockedObject()
        entry_2.name = 'fileset_entry'
        entry_2.relative_resource_name = 'entry_group/entries/entry_id_2'

        entry_3 = MockedObject()
        entry_3.name = 'fileset_entry'
        entry_3.relative_resource_name = 'entry_group_2/entries/entry_id_3'

        search_catalog.return_value = [entry, entry_2, entry_3]

        datacatalog_helper.delete_entries_and_entry_groups()

        search_catalog.assert_called_once()
        self.assertEqual(3, delete_entry.call_count)
        self.assertEqual(2, delete_entry_group.call_count)
Exemplo n.º 13
0
    def test_get_tag_template_should_not_raise_error(self, get_tag_template):
        datacatalog_helper = DataCatalogHelper('test_project')
        datacatalog_helper.get_fileset_enricher_tag_template(
            'tag_template_name')

        get_tag_template.assert_called_once()
Exemplo n.º 14
0
    def test_get_entry_should_not_raise_error(self, get_entry):
        datacatalog_helper = DataCatalogHelper('test_project')
        datacatalog_helper.get_entry('uscentral-1', 'test_entry_group',
                                     'testr_entry')

        get_entry.assert_called_once()
Exemplo n.º 15
0
    def test_delete_tag_template_should_not_raise_error(
            self, delete_tag_template):
        datacatalog_helper = DataCatalogHelper('test_project')
        datacatalog_helper.delete_tag_template()

        delete_tag_template.assert_called_once()
Exemplo n.º 16
0
    def test_delete_tag_should_not_raise_error(self, delete_tag):
        datacatalog_helper = DataCatalogHelper('test_project')
        datacatalog_helper.delete_tag('entry_group_id', 'entry_id')

        delete_tag.assert_called_once()