def test_get_table_by_user_relation(self, mock_rds_client: Any) -> None: table = RDSTable(name='test_table') table_description = RDSTableDescription(description='test_description') schema = RDSSchema(name='test_schema') cluster = RDSCluster(name='test_cluster') database = RDSDatabase(name='test_database') cluster.database = database schema.cluster = cluster table.schema = schema table.description = table_description mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options.all.return_value = [table] expected = { 'table': [ PopularTable(database='test_database', cluster='test_cluster', schema='test_schema', name='test_table', description='test_description') ] } proxy = MySQLProxy() actual = proxy.get_table_by_user_relation( user_email='test_user', relation_type=UserResourceRel.follow) self.assertEqual(len(actual['table']), 1) self.assertEqual(expected, actual)
def test_get_dashboard(self, mock_rds_client: Any) -> None: # dashboard_metadata dashboard = RDSDashboard( rk='foo_dashboard://gold.bar/dashboard_id', name='dashboard name', dashboard_url='http://www.foo.bar/dashboard_id', created_timestamp=123456789) dashboard_group = RDSDashboardGroup( name='group_name', dashboard_group_url='http://www.group_url.com') dashboard_group.cluster = RDSCluster(name='cluster_name') dashboard.group = dashboard_group dashboard.description = RDSDashboardDescription( description='description') dashboard.execution = [ RDSDashboardExecution(rk='dashboard_last_successful_execution', timestamp=9876543210), RDSDashboardExecution(rk='dashboard_last_execution', timestamp=987654321, state='good_state') ] dashboard.timestamp = RDSDashboardTimestamp(timestamp=123456654321) dashboard.tags = [ RDSTag(rk='tag_key1', tag_type='default'), RDSTag(rk='tag_key2', tag_type='default') ] dashboard.badges = [RDSBadge(rk='golden', category='table_status')] dashboard.owners = [ RDSUser(email='test_email', first_name='test_first_name', last_name='test_last_name', full_name='test_full_name', is_active=True, github_username='******', team_name='test_team', slack_id='test_id', employee_type='teamMember'), RDSUser(email='test_email2', first_name='test_first_name2', last_name='test_last_name2', full_name='test_full_name2', is_active=True, github_username='******', team_name='test_team2', slack_id='test_id2', employee_type='teamMember') ] dashboard.usage = [RDSDashboardUsage(read_count=100)] mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter.first.return_value = dashboard # queries query1 = RDSDashboardQuery(name='query1') query2 = RDSDashboardQuery(name='query2', url='http://foo.bar/query', query_text='SELECT * FROM foo.bar') query1.charts = [RDSDashboardChart(name='chart1')] query2.charts = [RDSDashboardChart(name='chart2')] queries = [query1, query2] # tables database1 = RDSDatabase(name='db1') database2 = RDSDatabase(name='db2') cluster1 = RDSCluster(name='cluster1') cluster2 = RDSCluster(name='cluster2') schema1 = RDSSchema(name='schema1') schema2 = RDSSchema(name='schema2') table1 = RDSTable(name='table1') table2 = RDSTable(name='table2') description1 = RDSTableDescription(description='table description 1') schema1.cluster = cluster1 cluster1.database = database1 schema2.cluster = cluster2 cluster2.database = database2 table1.schema = schema1 table2.schema = schema2 table1.description = description1 tables = [table1, table2] mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options.all.side_effect = [queries, tables] expected = DashboardDetail( uri='foo_dashboard://gold.bar/dashboard_id', cluster='cluster_name', group_name='group_name', group_url='http://www.group_url.com', product='foo', name='dashboard name', url='http://www.foo.bar/dashboard_id', description='description', created_timestamp=123456789, last_successful_run_timestamp=9876543210, updated_timestamp=123456654321, last_run_timestamp=987654321, last_run_state='good_state', owners=[ User(email='test_email', first_name='test_first_name', last_name='test_last_name', full_name='test_full_name', is_active=True, github_username='******', team_name='test_team', slack_id='test_id', employee_type='teamMember', manager_fullname=''), User(email='test_email2', first_name='test_first_name2', last_name='test_last_name2', full_name='test_full_name2', is_active=True, github_username='******', team_name='test_team2', slack_id='test_id2', employee_type='teamMember', manager_fullname='') ], frequent_users=[], chart_names=['chart1', 'chart2'], query_names=['query1', 'query2'], queries=[ DashboardQuery(name='query1'), DashboardQuery(name='query2', url='http://foo.bar/query', query_text='SELECT * FROM foo.bar') ], tables=[ PopularTable(database='db1', name='table1', description='table description 1', cluster='cluster1', schema='schema1'), PopularTable(database='db2', name='table2', cluster='cluster2', schema='schema2'), ], tags=[ Tag(tag_type='default', tag_name='tag_key1'), Tag(tag_type='default', tag_name='tag_key2') ], badges=[Badge(badge_name='golden', category='table_status')], recent_view_count=100) proxy = MySQLProxy() actual = proxy.get_dashboard(id='dashboard_id') self.assertEqual(expected, actual)
def test_get_popular_resources( self, mock_rds_client: Any, mock_get_global_popular_resources_uris: Any) -> None: # tables database = RDSDatabase(name='db') cluster = RDSCluster(name='cluster') schema = RDSSchema(name='schema') table1 = RDSTable(name='foo') table2 = RDSTable(name='bar') table_description = RDSTableDescription(description='test description') cluster.database = database schema.cluster = cluster table1.schema = schema table2.schema = schema table1.description = table_description # dashboards dashboard = RDSDashboard( rk='foo_dashboard://gold.bar/dashboard_id', name='dashboard name', dashboard_url='http://www.foo.bar/dashboard_id', created_timestamp=123456789) dashboard_group = RDSDashboardGroup( name='group_name', dashboard_group_url='http://www.group_url.com') dashboard_group.cluster = RDSDashboardCluster(name='cluster_name') dashboard.group = dashboard_group dashboard.description = RDSDashboardDescription( description='description') dashboard.execution = [ RDSDashboardExecution(rk='dashboard_last_successful_execution', timestamp=9876543210), RDSDashboardExecution(rk='dashboard_last_execution', timestamp=987654321, state='good_state') ] mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options_options = MagicMock() mock_session_query_filter_options.options.return_value = mock_session_query_filter_options_options mock_session_query_filter_options_options.all.side_effect = [[ table1, table2 ], [dashboard]] expected = { ResourceType.Table.name: [ TableSummary(database='db', cluster='cluster', schema='schema', name='foo', description='test description'), TableSummary(database='db', cluster='cluster', schema='schema', name='bar'), ], ResourceType.Dashboard.name: [ DashboardSummary(uri='foo_dashboard://gold.bar/dashboard_id', cluster='cluster_name', group_name='group_name', group_url='http://www.group_url.com', product='foo', name='dashboard name', url='http://www.foo.bar/dashboard_id', description='description', last_successful_run_timestamp=9876543210) ] } proxy = MySQLProxy() actual = proxy.get_popular_resources( num_entries=2, resource_types=['table', 'dashboard']) self.assertEqual(expected.__repr__(), actual.__repr__())
def test_get_table(self, mock_rds_client: Any) -> None: database = RDSDatabase(name='hive') cluster = RDSCluster(name='gold') schema = RDSSchema(name='foo_schema') schema.cluster = cluster cluster.database = database table = RDSTable(name='foo_table') table.schema = schema table.description = RDSTableDescription(description='foo description') col1 = RDSColumn(name='bar_id_1', type='varchar', sort_order=0) col1.description = RDSColumnDescription( description='bar col description') col1.stats = [ RDSColumnStat(stat_type='avg', start_epoch='1', end_epoch='1', stat_val='1') ] col2 = RDSColumn(name='bar_id_2', type='bigint', sort_order=1) col2.description = RDSColumnDescription( description='bar col2 description') col2.stats = [ RDSColumnStat(stat_type='avg', start_epoch='2', end_epoch='2', stat_val='2') ] col2.badges = [RDSBadge(rk='primary key', category='column')] columns = [col1, col2] table.watermarks = [ RDSTableWatermark( rk='hive://gold.test_schema/test_table/high_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time'), RDSTableWatermark( rk='hive://gold.test_schema/test_table/low_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time') ] table.application = RDSApplication( application_url='airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id') table.timestamp = RDSTableTimestamp(last_updated_timestamp=1) table.owners = [ RDSUser(rk='*****@*****.**', email='*****@*****.**') ] table.tags = [RDSTag(rk='test', tag_type='default')] table.badges = [RDSBadge(rk='golden', category='table_status')] table.source = RDSTableSource(rk='some key', source_type='github', source='/source_file_loc') table.programmatic_descriptions = [ RDSTableProgrammaticDescription(description_source='s3_crawler', description='Test Test Test'), RDSTableProgrammaticDescription( description_source='quality_report', description='Test Test') ] readers = [RDSTableUsage(user_rk='*****@*****.**', read_count=5)] mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter.first.return_value = table mock_session_query_filter_orderby = MagicMock() mock_session_query_filter.order_by.return_value = mock_session_query_filter_orderby mock_session_query_filter_orderby_limit = MagicMock() mock_session_query_filter_orderby.limit.return_value = mock_session_query_filter_orderby_limit mock_session_query_filter_orderby_limit.all.return_value = readers mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options.all.return_value = columns proxy = MySQLProxy() actual_table = proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Badge(badge_name='golden', category='table_status')], table_readers=[ Reader(user=User(email='*****@*****.**'), read_count=5) ], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Stat(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ], badges=[]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Stat(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ], badges=[ Badge(badge_name='primary key', category='column') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url= 'airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id'), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=False, programmatic_descriptions=[ ProgrammaticDescription(source='quality_report', text='Test Test'), ProgrammaticDescription(source='s3_crawler', text='Test Test Test') ]) self.assertEqual(str(expected), str(actual_table))
def _create_record_iterator(self) -> Iterator[RDSModel]: # Database, Cluster, Schema others: List[RDSModel] = [ RDSDatabase(rk=self._get_database_key(), name=self.database), RDSCluster(rk=self._get_cluster_key(), name=self.cluster, database_rk=self._get_database_key()), RDSSchema(rk=self._get_schema_key(), name=self.schema, cluster_rk=self._get_cluster_key()) ] for record in others: if record.rk not in TableMetadata.serialized_records_keys: TableMetadata.serialized_records_keys.add(record.rk) yield record # Table yield RDSTable(rk=self._get_table_key(), name=self.name, is_view=self.is_view, schema_rk=self._get_schema_key()) # Table description if self.description: description_record_key = self._get_table_description_key( self.description) if self.description.label == DescriptionMetadata.DESCRIPTION_NODE_LABEL: yield RDSTableDescription( rk=description_record_key, description_source=self.description.source, description=self.description.text, table_rk=self._get_table_key()) else: yield RDSTableProgrammaticDescription( rk=description_record_key, description_source=self.description.source, description=self.description.text, table_rk=self._get_table_key()) # Tag for tag in self.tags: tag_record = TagMetadata(tag).get_record() yield tag_record table_tag_record = RDSTableTag(table_rk=self._get_table_key(), tag_rk=TagMetadata.get_tag_key(tag)) yield table_tag_record # Column for col in self.columns: yield RDSTableColumn(rk=self._get_col_key(col), name=col.name, type=col.type, sort_order=col.sort_order, table_rk=self._get_table_key()) if col.description: description_record_key = self._get_col_description_key( col, col.description) yield RDSColumnDescription( rk=description_record_key, description_source=col.description.source, description=col.description.text, column_rk=self._get_col_key(col)) if col.badges: badge_metadata = BadgeMetadata( start_label=ColumnMetadata.COLUMN_NODE_LABEL, start_key=self._get_col_key(col), badges=col.badges) badge_records = badge_metadata.get_badge_records() for badge_record in badge_records: yield badge_record column_badge_record = RDSColumnBadge( column_rk=self._get_col_key(col), badge_rk=badge_record.rk) yield column_badge_record