def test_set_string_field_should_truncate_uft8_bytes_size_mixed(self): """ - Input string: 1990 'a' chars + 10 'ã' chars; - Expected field value: '1990 "a" chars + 3 "ã" chars + ...' since each 'ã' char needs 2 bytes, 'a' chars and periods need 1 byte when encoded in UTF-8. """ tag = types.Tag() str_value = u'' for _ in range(10): str_value += u'ã' self.__base_tag_factory._set_string_field( tag, 'string', u'{}{}'.format('a' * 1990, str_value)) str_value = u'' for _ in range(3): str_value += u'ã' self.assertEqual(1996, len(tag.fields['string'].string_value)) self.assertEqual(u'{}{}...'.format('a' * 1990, str_value), tag.fields['string'].string_value) self.assertEqual( 1999, len(tag.fields['string'].string_value.encode('UTF-8')))
def make_tag_for_entity(self, entity, entity_types_dict, enum_types_dict): tag = types.Tag() guid = entity['guid'] data = entity['data'] entity_type_name = data['typeName'] entity_type = entity_types_dict[entity_type_name] formatted_name = attr_normalizer.DataCatalogAttributeNormalizer.\ format_name(entity_type_name) tag_template_id = attr_normalizer.DataCatalogAttributeNormalizer.\ create_tag_template_id(formatted_name, constant.ENTITY_TYPE_PREFIX, entity_type['data']['version']) tag.template = datacatalog.DataCatalogClient.tag_template_path( project=self.__project_id, location=self.__location_id, tag_template=tag_template_id) attributes = data['attributes'] super()._set_bool_field(tag, formatted_name, True) super()._set_string_field(tag, constant.ENTITY_GUID, guid) super()._set_string_field(tag, constant.INSTANCE_URL_FIELD, self.__instance_url) attribute_defs = entity_type['data']['attributeDefs'] self.__add_fields_from_attributes(tag, attributes, attribute_defs, enum_types_dict) self.__create_custom_fields_for_entity_type(tag, entity_type_name, attributes) return tag
def make_tag_for_workbook(cls, tag_template, workbook_metadata): tag = types.Tag() tag.template = tag_template.name super()._set_string_field(tag, 'luid', workbook_metadata.get('luid')) site = workbook_metadata.get('site') if site: super()._set_string_field(tag, 'site_name', site.get('name')) super()._set_string_field(tag, 'project_name', workbook_metadata.get('projectName')) owner = workbook_metadata.get('owner') if owner: super()._set_string_field(tag, 'owner_username', owner.get('username')) super()._set_string_field(tag, 'owner_name', owner.get('name')) upstream_tables = cls.make_upstream_tables_field_value( workbook_metadata) if upstream_tables: super()._set_string_field( tag, 'upstream_table_definition', 'DATABASE NAME (CONNECTION TYPE) / TABLE NAME') super()._set_string_field(tag, 'upstream_tables', ', '.join(upstream_tables)) return tag
def make_tag_for_folder(self, tag_template, folder): tag = types.Tag() tag.template = tag_template.name super()._set_string_field(tag, 'id', folder.id) super()._set_string_field(tag, 'name', folder.name) super()._set_string_field(tag, 'parent_id', folder.parent_id) super()._set_bool_field(tag, 'has_children', folder.child_count > 0) super()._set_double_field(tag, 'children_count', folder.child_count) has_dashboards = hasattr(folder, 'dashboards') if has_dashboards: has_dashboards = True if folder.dashboards else False super()._set_bool_field(tag, 'has_dashboards', has_dashboards) dashboards_count = len(folder.dashboards) if has_dashboards else 0 super()._set_double_field(tag, 'dashboards_count', dashboards_count) has_looks = hasattr(folder, 'looks') if has_looks: has_looks = True if folder.looks else False super()._set_bool_field(tag, 'has_looks', has_looks) looks_count = len(folder.looks) if has_looks else 0 super()._set_double_field(tag, 'looks_count', looks_count) super()._set_string_field(tag, 'instance_url', self.__instance_url) return tag
def make_tag_for_dashboard_element(self, tag_template, element, dashboard): tag = types.Tag() tag.template = tag_template.name super()._set_string_field(tag, 'id', element.id) super()._set_string_field(tag, 'type', element.type) super()._set_string_field(tag, 'dashboard_id', element.dashboard_id) if dashboard: super()._set_string_field(tag, 'dashboard_title', dashboard.title) super()._set_string_field(tag, 'look_id', element.look_id) if element.look: super()._set_string_field(tag, 'look_title', element.look.title) super()._set_string_field(tag, 'lookml_link_id', element.lookml_link_id) if element.query_id: super()._set_double_field(tag, 'query_id', element.query_id) elif element.result_maker: super()._set_double_field(tag, 'query_id', element.result_maker.query_id) super()._set_string_field(tag, 'instance_url', self.__instance_url) return tag
def make_tag_for_dashboard(self, tag_template, dashboard): tag = types.Tag() tag.template = tag_template.name super()._set_string_field(tag, 'id', dashboard.id) super()._set_string_field(tag, 'description', dashboard.description) super()._set_string_field(tag, 'folder_id', dashboard.space.id) super()._set_string_field(tag, 'folder_name', dashboard.space.name) super()._set_bool_field(tag, 'is_hidden', dashboard.hidden) super()._set_double_field(tag, 'user_id', dashboard.user_id) super()._set_double_field(tag, 'view_count', dashboard.view_count) super()._set_double_field(tag, 'favorite_count', dashboard.favorite_count) super()._set_timestamp_field(tag, 'last_accessed_at', dashboard.last_accessed_at) super()._set_timestamp_field(tag, 'last_viewed_at', dashboard.last_viewed_at) super()._set_bool_field(tag, 'is_deleted', dashboard.deleted) super()._set_timestamp_field(tag, 'deleted_at', dashboard.deleted_at) super()._set_double_field(tag, 'deleter_id', dashboard.deleter_id) super()._set_string_field(tag, 'instance_url', self.__instance_url) return tag
def test_set_timestamp_field_should_set_given_value(self): tag = types.Tag() self.__base_tag_factory._set_timestamp_field( tag, 'timestamp-field', dateutil.parser.isoparse('2019-09-12T16:30:00+0000')) date = dateutil.parser.isoparse('2019-09-12T16:30:00+0000') self.assertEqual(int(calendar.timegm(date.utctimetuple())), tag.fields['timestamp-field'].timestamp_value.seconds)
def __mock_make_tag_classification(cls, classification, *args): tag = types.Tag() template = 'fake_template/{}'.format(classification['typeName']) if len(args) > 1: template = '{}/column'.format(template) tag.template = template return tag
def make_tag(cls, tag_template, fields_dict, column=None): tag = types.Tag() tag.template = tag_template.name if column: tag.column = column cls.__set_tag_fields(tag, tag_template, fields_dict) return tag
def make_fake_tag(): tag = types.Tag() tag.template = 'test-template' tag.fields['test-bool-field'].bool_value = True tag.fields['test-double-field'].double_value = 1 tag.fields['test-string-field'].string_value = 'Test String Value' tag.fields['test-timestamp-field'].timestamp_value.FromJsonString( '2019-10-15T01:00:00-03:00') tag.fields['test-enum-field'].enum_value.display_name = 'Test ENUM Value' return tag
def create_tag(self, entry, tag_template, fields_descriptors): """Create a Tag.""" tag = types.Tag() tag.template = tag_template.name for descriptor in fields_descriptors: self.__set_tag_field_value(tag.fields[descriptor['id']], descriptor['value'], descriptor['primitive_type']) return self.__datacatalog.create_tag(parent=entry.name, tag=tag)
def __make_fake_tag(cls, string_fields=None, double_fields=None): tag = types.Tag() if string_fields: for field in string_fields: tag.fields[field[0]].string_value = field[1] if double_fields: for field in double_fields: tag.fields[field[0]].double_value = field[1] return tag
def __create_tag(cls): tag = types.Tag() tag.name = 'tag_template' tag.template = 'template' tag.fields['bool-field'].bool_value = True tag.fields['double-field'].double_value = 1 tag.fields['string-field'].string_value = 'Test String Value' tag.fields['timestamp-field'].timestamp_value.FromJsonString( '2019-09-06T11:00:00-03:00') tag.fields['enum-field'].enum_value.display_name = \ 'Test ENUM Value' return tag
def make_tag_for_query(self, tag_template, assembled_query_metadata): tag = types.Tag() tag.template = tag_template.name query = assembled_query_metadata.query generated_sql = assembled_query_metadata.generated_sql model_explore = assembled_query_metadata.model_explore connection = assembled_query_metadata.connection super()._set_double_field(tag, 'id', query.id) if query.fields: value = self.__STRING_VALUE_ARRAY_ELEM_SEP.join(query.fields) super()._set_string_field(tag, 'fields', value) if query.pivots: value = self.__STRING_VALUE_ARRAY_ELEM_SEP.join(query.pivots) super()._set_string_field(tag, 'pivots', value) if query.sorts: value = self.__STRING_VALUE_ARRAY_ELEM_SEP.join(query.sorts) super()._set_string_field(tag, 'sorts', value) super()._set_double_field(tag, 'runtime', query.runtime) super()._set_string_field(tag, 'client_id', query.client_id) super()._set_string_field(tag, 'query_timezone', query.query_timezone) super()._set_string_field(tag, 'lookml_model', query.model) super()._set_string_field(tag, 'explore_name', query.view) super()._set_string_field(tag, 'sql', generated_sql) if model_explore: super()._set_string_field(tag, 'lookml_project', model_explore.project_name) super()._set_string_field(tag, 'connection', model_explore.connection_name) if connection: super()._set_string_field(tag, 'host', connection.host) super()._set_string_field(tag, 'database', connection.database) super()._set_string_field(tag, 'connection_dialect', connection.dialect_name) super()._set_string_field(tag, 'connection_username', connection.username) super()._set_string_field(tag, 'instance_url', self.__instance_url) return tag
def test_set_string_field_should_truncate_uft8_bytes_size(self): """ - Input string: 2001 'a' chars; - Expected field value: '1997 "a" chars + ...' since each char needs 1 byte when encoded in UTF-8. """ tag = types.Tag() self.__base_tag_factory._set_string_field(tag, 'string', 'a' * 2001) self.assertEqual(2000, len(tag.fields['string'].string_value)) self.assertEqual('{}...'.format('a' * 1997), tag.fields['string'].string_value) self.assertEqual( 2000, len(tag.fields['string'].string_value.encode('UTF-8')))
def make_tag_for_column_ref(self, column_guid, column_name): tag = types.Tag() tag_template_id = attr_normalizer.DataCatalogAttributeNormalizer.\ create_tag_template_id('ref', constant.COLUMN_PREFIX) tag.template = datacatalog.DataCatalogClient.tag_template_path( project=self.__project_id, location=self.__location_id, tag_template=tag_template_id) super()._set_string_field(tag, 'column_guid', column_guid) tag.column = attr_normalizer.DataCatalogAttributeNormalizer.\ format_name(column_name) return tag
def make_tag_for_sheet(cls, tag_template, sheet_metadata, workbook_metadata): tag = types.Tag() tag.template = tag_template.name super()._set_string_field(tag, 'id', sheet_metadata.get('id')) super()._set_string_field(tag, 'luid', sheet_metadata.get('luid')) super()._set_string_field(tag, 'workbook_luid', workbook_metadata.get('luid')) super()._set_string_field(tag, 'workbook_name', workbook_metadata.get('name')) super()._set_string_field(tag, 'site_name', workbook_metadata.get('site').get('name')) super()._set_bool_field( tag, 'has_external_url', 'path' in sheet_metadata and not sheet_metadata.get('path') == '') return tag
def tag(table_entry, tag_template, scope='function'): tag = types.Tag() tag.template = tag_template.name tag.fields['boolean_field'].bool_value = True tag.fields['double_field'].double_value = 10.5 tag.fields['string_field'].string_value = 'test' tag.fields['timestamp_field'].timestamp_value.FromJsonString( '2019-07-04T01:00:30Z') tag.fields['enum_field'].enum_value.display_name = 'VALUE 1' tag = datacatalog_client.create_tag(parent=table_entry.name, tag=tag) time.sleep( 2) # Wait a few seconds for Data Catalog's search index sync/update. yield tag datacatalog_client.delete_tag(tag.name) time.sleep( 2) # Wait a few seconds for Data Catalog's search index sync/update.
def make_tag_for_classification(self, entity_classification, classifications, enum_types_dict, column_name=None): tag = types.Tag() classification_name = entity_classification['typeName'] classification = classifications[classification_name] classification_data = classification['data'] formatted_name = attr_normalizer.DataCatalogAttributeNormalizer.\ format_name(classification_name) version = classification_data.get('version') tag_template_id = attr_normalizer.DataCatalogAttributeNormalizer.\ create_tag_template_id(formatted_name, constant.CLASSIFICATION_PREFIX, version) tag.template = datacatalog.DataCatalogClient.tag_template_path( project=self.__project_id, location=self.__location_id, tag_template=tag_template_id) super()._set_bool_field(tag, formatted_name, True) attributes = entity_classification.get('attributes') attribute_defs = classification_data['attributeDefs'] if attributes: self.__add_fields_from_attributes(tag, attributes, attribute_defs, enum_types_dict) if column_name: tag.column = attr_normalizer.DataCatalogAttributeNormalizer.\ format_name(column_name) return tag
def make_tag_for_look(self, tag_template, look): tag = types.Tag() tag.template = tag_template.name super()._set_double_field(tag, 'id', look.id) super()._set_string_field(tag, 'description', look.description) super()._set_string_field(tag, 'folder_id', look.space.id) super()._set_string_field(tag, 'folder_name', look.space.name) super()._set_bool_field(tag, 'is_public', look.public) super()._set_double_field(tag, 'user_id', look.user_id) super()._set_double_field(tag, 'last_updater_id', look.last_updater_id) super()._set_double_field(tag, 'query_id', look.query_id) # Some objects, such as LookWithDashboards instances, don't have # the url attribute. if hasattr(look, 'url'): super()._set_string_field(tag, 'url', look.url) super()._set_string_field(tag, 'short_url', look.short_url) super()._set_string_field(tag, 'public_url', look.public_url) super()._set_string_field(tag, 'excel_file_url', look.excel_file_url) super()._set_string_field(tag, 'google_spreadsheet_formula', look.google_spreadsheet_formula) super()._set_double_field(tag, 'view_count', look.view_count) super()._set_double_field(tag, 'favorite_count', look.favorite_count) super()._set_timestamp_field(tag, 'last_accessed_at', look.last_accessed_at) super()._set_timestamp_field(tag, 'last_viewed_at', look.last_viewed_at) super()._set_bool_field(tag, 'is_deleted', look.deleted) super()._set_timestamp_field(tag, 'deleted_at', look.deleted_at) super()._set_double_field(tag, 'deleter_id', look.deleter_id) super()._set_string_field(tag, 'instance_url', self.__instance_url) return tag
def __mock_make_tag(cls, entity, *_): tag = types.Tag() tag.template = 'fake_template/entity_type/{}'.format( entity['data']['typeName']) return tag
def __mock_make_tag_parent_dep(cls, tag_template_dict, asset, parent): tag = types.Tag() tag.template = tag_template_dict['name'] return tag
def test_set_timestamp_field_should_skip_none_value(self): tag = types.Tag() self.__base_tag_factory._set_timestamp_field(tag, 'timestamp-field', None) self.assertNotIn('timestamp-field', tag.fields)
def test_set_double_field_should_skip_none_value(self): tag = types.Tag() self.__base_tag_factory._set_double_field(tag, 'double', None) self.assertNotIn('double', tag.fields)
def test_set_double_field_should_set_given_value(self): tag = types.Tag() self.__base_tag_factory._set_double_field(tag, 'double', 2.5) self.assertEqual(2.5, tag.fields['double'].double_value)
def test_set_string_field_should_skip_empty_value(self): tag = types.Tag() self.__base_tag_factory._set_string_field(tag, 'string', '') self.assertNotIn('string', tag.fields)
def __mock_make_tag_for_column_ref(cls, column_guid, _): tag = types.Tag() tag.template = 'fake_template/{}/column_ref'.format(column_guid) return tag
def test_set_bool_field_should_set_given_value(self): tag = types.Tag() self.__base_tag_factory._set_bool_field(tag, 'bool', False) self.assertFalse(tag.fields['bool'].bool_value)