def test_not_including_empty_attribute(self) -> None: test_user = User(email='*****@*****.**', foo='bar') self.assertDictEqual( neo4_serializer.serialize_node(test_user.create_next_node()), { 'KEY': '*****@*****.**', 'LABEL': 'User', 'email': '*****@*****.**', 'is_active:UNQUOTED': True, 'profile_url': '', 'first_name': '', 'last_name': '', 'full_name': '', 'github_username': '', 'team_name': '', 'employee_type': '', 'slack_id': '', 'role_name': '', 'updated_at:UNQUOTED': 0, 'foo': 'bar' }) test_user2 = User(email='*****@*****.**', foo='bar', is_active=False, do_not_update_empty_attribute=True) self.assertDictEqual( neo4_serializer.serialize_node(test_user2.create_next_node()), { 'KEY': '*****@*****.**', 'LABEL': 'User', 'email': '*****@*****.**', 'foo': 'bar' })
def test_serialize(self) -> None: # First test node_row = self.metric_metadata.next_node() actual = [] while node_row: serialized_node = neo4_serializer.serialize_node(node_row) actual.append(serialized_node) node_row = self.metric_metadata.next_node() self.assertEqual(self.expected_nodes, actual) relation_row = self.metric_metadata.next_relation() actual = [] while relation_row: serialized_relation = neo4_serializer.serialize_relationship( relation_row) actual.append(serialized_relation) relation_row = self.metric_metadata.next_relation() self.assertEqual(self.expected_rels, actual) # Second test node_row = self.metric_metadata2.next_node() actual = [] while node_row: serialized_node = neo4_serializer.serialize_node(node_row) actual.append(serialized_node) node_row = self.metric_metadata2.next_node() self.assertEqual(self.expected_nodes_deduped2, actual) relation_row = self.metric_metadata2.next_relation() actual = [] while relation_row: serialized_relation = neo4_serializer.serialize_relationship( relation_row) actual.append(serialized_relation) relation_row = self.metric_metadata2.next_relation() self.assertEqual(self.expected_rels_deduped2, actual) # Third test node_row = self.metric_metadata3.next_node() actual = [] while node_row: serialized_node = neo4_serializer.serialize_node(node_row) actual.append(serialized_node) node_row = self.metric_metadata3.next_node() self.assertEqual(self.expected_nodes_deduped3, actual) relation_row = self.metric_metadata3.next_relation() actual = [] while relation_row: serialized_relation = neo4_serializer.serialize_relationship( relation_row) actual.append(serialized_relation) relation_row = self.metric_metadata3.next_relation() self.assertEqual(self.expected_rels_deduped3, actual)
def test_create_nodes_programmatic_description(self) -> None: schema = SchemaModel(schema_key='db://cluster.schema', schema='schema_name', description='foo', description_source='bar') schema_node = schema.create_next_node() serialized_schema_node = neo4_serializer.serialize_node(schema_node) schema_desc_node = schema.create_next_node() serialized_schema_prod_desc_node = neo4_serializer.serialize_node( schema_desc_node) self.assertDictEqual(serialized_schema_node, { 'name': 'schema_name', 'KEY': 'db://cluster.schema', 'LABEL': 'Schema' }) self.assertDictEqual( serialized_schema_prod_desc_node, { 'description_source': 'bar', 'description': 'foo', 'KEY': 'db://cluster.schema/_bar_description', 'LABEL': 'Programmatic_Description' }) self.assertIsNone(schema.create_next_node())
def test_create_nodes(self) -> None: schema_node = self.schema.create_next_node() serialized_schema_node = neo4_serializer.serialize_node(schema_node) schema_desc_node = self.schema.create_next_node() serialized_schema_desc_node = neo4_serializer.serialize_node(schema_desc_node) self.assertDictEqual( serialized_schema_node, {'name': 'schema_name', 'KEY': 'db://cluster.schema', 'LABEL': 'Schema'} ) self.assertDictEqual(serialized_schema_desc_node, {'description_source': 'description', 'description': 'foo', 'KEY': 'db://cluster.schema/_description', 'LABEL': 'Description'} ) self.assertIsNone(self.schema.create_next_node())
def test_serialize(self) -> None: node_row = self.table_col_usage.next_node() actual = [] while node_row: actual.append(neo4_serializer.serialize_node(node_row)) node_row = self.table_col_usage.next_node() expected = [{ 'LABEL': 'User', 'KEY': '*****@*****.**', 'email': '*****@*****.**' }] self.assertEqual(expected, actual) rel_row = self.table_col_usage.next_relation() actual = [] while rel_row: actual.append(neo4_serializer.serialize_relationship(rel_row)) rel_row = self.table_col_usage.next_relation() expected = [{ 'read_count:UNQUOTED': 1, 'END_KEY': '*****@*****.**', 'START_LABEL': 'Table', 'END_LABEL': 'User', 'START_KEY': 'db://gold.scm/foo', 'TYPE': 'READ_BY', 'REVERSE_TYPE': 'READ' }] self.assertEqual(expected, actual)
def test_tags_populated_from_str(self) -> None: self.table_metadata5 = TableMetadata('hive', 'gold', 'test_schema5', 'test_table5', 'test_table5', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0)], tags="tag3, tag4") # Test table tag field populated from str node_row = self.table_metadata5.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = self.table_metadata5.next_node() self.assertEqual(actual[2].get('LABEL'), 'Tag') self.assertEqual(actual[2].get('KEY'), 'tag3') self.assertEqual(actual[3].get('KEY'), 'tag4') relation_row = self.table_metadata5.next_relation() actual = [] while relation_row: relation_row_serialized = neo4_serializer.serialize_relationship(relation_row) actual.append(relation_row_serialized) relation_row = self.table_metadata5.next_relation() # Table tag relationship expected_tab_tag_rel3 = {'END_KEY': 'tag3', 'START_LABEL': 'Table', 'END_LABEL': 'Tag', 'START_KEY': 'hive://gold.test_schema5/test_table5', 'TYPE': 'TAGGED_BY', 'REVERSE_TYPE': 'TAG'} expected_tab_tag_rel4 = {'END_KEY': 'tag4', 'START_LABEL': 'Table', 'END_LABEL': 'Tag', 'START_KEY': 'hive://gold.test_schema5/test_table5', 'TYPE': 'TAGGED_BY', 'REVERSE_TYPE': 'TAG'} self.assertEqual(actual[2], expected_tab_tag_rel3) self.assertEqual(actual[3], expected_tab_tag_rel4)
def test_table_attributes(self) -> None: self.table_metadata3 = TableMetadata( 'hive', 'gold', 'test_schema3', 'test_table3', 'test_table3', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0), ColumnMetadata('test_id2', 'description of test_id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5) ], is_view=False, attr1='uri', attr2='attr2') node_row = self.table_metadata3.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = self.table_metadata3.next_node() self.assertEqual(actual[0].get('attr1'), 'uri') self.assertEqual(actual[0].get('attr2'), 'attr2')
def test_dashboard_usage_user_nodes(self) -> None: dashboard_usage = DashboardUsage( dashboard_group_id='dashboard_group_id', dashboard_id='dashboard_id', email='*****@*****.**', view_count=123, cluster='cluster_id', product='product_id', should_create_user_node=True) actual = dashboard_usage.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected: Dict[str, Any] = { 'is_active:UNQUOTED': True, 'last_name': '', 'full_name': '', 'employee_type': '', 'first_name': '', 'updated_at:UNQUOTED': 0, 'LABEL': 'User', 'slack_id': '', 'KEY': '*****@*****.**', 'github_username': '', 'team_name': '', 'email': '*****@*****.**', 'role_name': '' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(dashboard_usage.create_next_node())
def test_create_nodes_with_owners_list(self) -> None: self.table_owner_list = TableOwner(db_name='hive', schema=SCHEMA, table_name=TABLE, cluster=CLUSTER, owners=['user1@1', ' user2@2 ']) nodes = self.table_owner_list.create_nodes() self.assertEqual(len(nodes), 2) expected_node1 = { NODE_KEY: User.USER_NODE_KEY_FORMAT.format(email=owner1), NODE_LABEL: User.USER_NODE_LABEL, User.USER_NODE_EMAIL: owner1 } expected_node2 = { NODE_KEY: User.USER_NODE_KEY_FORMAT.format(email=owner2), NODE_LABEL: User.USER_NODE_LABEL, User.USER_NODE_EMAIL: owner2 } actual_nodes = [ neo4_serializer.serialize_node(node) for node in nodes ] self.assertTrue(expected_node1 in actual_nodes) self.assertTrue(expected_node2 in actual_nodes)
def test_z_custom_sources(self) -> None: self.custom_source = TableMetadata( 'hive', 'gold', 'test_schema3', 'test_table4', 'test_table4', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0), ColumnMetadata('test_id2', 'description of test_id2', 'bigint', 1), ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('ds', None, 'varchar', 5) ], is_view=False, description_source="custom") node_row = self.custom_source.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = self.custom_source.next_node() expected = { 'LABEL': 'Programmatic_Description', 'KEY': 'hive://gold.test_schema3/test_table4/_custom_description', 'description_source': 'custom', 'description': 'test_table4' } self.assertEqual(actual[1], expected)
def test_col_badge_field(self) -> None: self.table_metadata4 = TableMetadata('hive', 'gold', 'test_schema4', 'test_table4', 'test_table4', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0, ['col-badge1', 'col-badge2'])], is_view=False, attr1='uri', attr2='attr2') node_row = self.table_metadata4.next_node() actual = [] while node_row: serialized_node_row = neo4_serializer.serialize_node(node_row) actual.append(serialized_node_row) node_row = self.table_metadata4.next_node() self.assertEqual(actual[4].get('KEY'), 'col-badge1') self.assertEqual(actual[5].get('KEY'), 'col-badge2') relation_row = self.table_metadata4.next_relation() actual = [] while relation_row: serialized_relation_row = neo4_serializer.serialize_relationship(relation_row) actual.append(serialized_relation_row) relation_row = self.table_metadata4.next_relation() expected_col_badge_rel1 = {'END_KEY': 'col-badge1', 'START_LABEL': 'Column', 'END_LABEL': 'Badge', 'START_KEY': 'hive://gold.test_schema4/test_table4/test_id1', 'TYPE': 'HAS_BADGE', 'REVERSE_TYPE': 'BADGE_FOR'} expected_col_badge_rel2 = {'END_KEY': 'col-badge2', 'START_LABEL': 'Column', 'END_LABEL': 'Badge', 'START_KEY': 'hive://gold.test_schema4/test_table4/test_id1', 'TYPE': 'HAS_BADGE', 'REVERSE_TYPE': 'BADGE_FOR'} self.assertEqual(actual[4], expected_col_badge_rel1) self.assertEqual(actual[5], expected_col_badge_rel2)
def test_serialize(self) -> None: node_row = self.table_col_usage.next_node() actual = [] while node_row: actual.append(neo4_serializer.serialize_node(node_row)) node_row = self.table_col_usage.next_node() expected = [{'first_name': '', 'last_name': '', 'full_name': '', 'employee_type': '', 'is_active:UNQUOTED': True, 'updated_at:UNQUOTED': 0, 'LABEL': 'User', 'slack_id': '', 'KEY': '*****@*****.**', 'github_username': '', 'team_name': '', 'email': '*****@*****.**', 'role_name': ''}] self.assertEqual(expected, actual) rel_row = self.table_col_usage.next_relation() actual = [] while rel_row: actual.append(neo4_serializer.serialize_relationship(rel_row)) rel_row = self.table_col_usage.next_relation() expected = [{'read_count:UNQUOTED': 1, 'END_KEY': '*****@*****.**', 'START_LABEL': 'Table', 'END_LABEL': 'User', 'START_KEY': 'db://gold.scm/foo', 'TYPE': 'READ_BY', 'REVERSE_TYPE': 'READ'}] self.assertEqual(expected, actual)
def test_create_nodes(self) -> None: nodes = self.watermark.create_nodes() self.assertEquals(len(nodes), 1) self.assertEquals(nodes[0], self.expected_node_result) self.assertEqual(neo4_serializer.serialize_node(nodes[0]), self.expected_serialized_node_result)
def test_create_nodes(self) -> None: expected_nodes = [{ 'LABEL': 'User', 'KEY': '*****@*****.**', 'email': '*****@*****.**', 'is_active:UNQUOTED': True, 'profile_url': 'https://profile', 'first_name': 'test_first', 'last_name': 'test_last', 'full_name': 'test_first test_last', 'github_username': '******', 'team_name': 'test_team', 'employee_type': 'FTE', 'slack_id': 'slack', 'role_name': 'swe', 'updated_at:UNQUOTED': 1 }] actual = [] node = self.user.create_next_node() while node: serialized_node = neo4_serializer.serialize_node(node) actual.append(serialized_node) node = self.user.create_next_node() self.assertEqual(actual, expected_nodes)
def test_create_nodes_with_owners_list(self) -> None: self.table_owner_list = TableOwner(db_name='hive', schema=SCHEMA, table_name=TABLE, cluster=CLUSTER, owners=['user1@1', ' user2@2 ']) expected_node1 = { NODE_KEY: User.USER_NODE_KEY_FORMAT.format(email=owner1), NODE_LABEL: User.USER_NODE_LABEL, User.USER_NODE_EMAIL: owner1 } expected_node2 = { NODE_KEY: User.USER_NODE_KEY_FORMAT.format(email=owner2), NODE_LABEL: User.USER_NODE_LABEL, User.USER_NODE_EMAIL: owner2 } expected = [expected_node1, expected_node2] actual = [] node = self.table_owner_list.create_next_node() while node: serialized_node = neo4_serializer.serialize_node(node) actual.append(serialized_node) node = self.table_owner_list.create_next_node() self.assertEqual(actual, expected)
def test_usage_nodes(self) -> None: node = self.usage.next_node() actual = [] while node: node_serialized = neo4_serializer.serialize_node(node) actual.append(node_serialized) node = self.usage.next_node() self.assertEqual(actual, self.expected_nodes)
def test_create_nodes(self) -> None: actual = [] node = self.table_lineage.create_next_node() while node: serialized_node = neo4_serializer.serialize_node(node) actual.append(serialized_node) node = self.table_lineage.create_next_node() self.assertEqual(len(actual), 0)
def test_tags_field(self) -> None: self.table_metadata4 = TableMetadata( 'hive', 'gold', 'test_schema4', 'test_table4', 'test_table4', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0) ], is_view=False, tags=['tag1', 'tag2'], attr1='uri', attr2='attr2') node_row = self.table_metadata4.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = self.table_metadata4.next_node() self.assertEqual(actual[0].get('attr1'), 'uri') self.assertEqual(actual[0].get('attr2'), 'attr2') self.assertEqual(actual[2].get('LABEL'), 'Tag') self.assertEqual(actual[2].get('KEY'), 'tag1') self.assertEqual(actual[3].get('KEY'), 'tag2') relation_row = self.table_metadata4.next_relation() actual = [] while relation_row: relation_row_serialized = neo4_serializer.serialize_relationship( relation_row) actual.append(relation_row_serialized) relation_row = self.table_metadata4.next_relation() # Table tag relationship expected_tab_tag_rel1 = { 'END_KEY': 'tag1', 'START_LABEL': 'Table', 'END_LABEL': 'Tag', 'START_KEY': 'hive://gold.test_schema4/test_table4', 'TYPE': 'TAGGED_BY', 'REVERSE_TYPE': 'TAG' } expected_tab_tag_rel2 = { 'END_KEY': 'tag2', 'START_LABEL': 'Table', 'END_LABEL': 'Tag', 'START_KEY': 'hive://gold.test_schema4/test_table4', 'TYPE': 'TAGGED_BY', 'REVERSE_TYPE': 'TAG' } self.assertEqual(actual[2], expected_tab_tag_rel1) self.assertEqual(actual[3], expected_tab_tag_rel2)
def test_create_nodes(self) -> None: actual = [] node = self.es_last_updated.create_next_node() while node: serialized_node = neo4_serializer.serialize_node(node) actual.append(serialized_node) node = self.es_last_updated.create_next_node() self.assertEqual(actual, self.expected_node_results)
def test_create_nodes(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', chart_url='http://gold.foo/chart' ) actual = dashboard_chart.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected: Dict[str, Any] = { 'name': 'c_name', 'type': 'bar', 'id': 'c_id', 'url': 'http://gold.foo/chart', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(dashboard_chart.create_next_node()) dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_url='http://gold.foo.bar/' ) actual2 = dashboard_chart.create_next_node() actual2_serialized = neo4_serializer.serialize_node(actual2) expected2: Dict[str, Any] = { 'id': 'c_id', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart', 'url': 'http://gold.foo.bar/' } assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized)
def test_create_nodes(self) -> None: for tc in self.test_cases: actual = [] node = tc.application.create_next_node() while node: serialized_next_node = neo4_serializer.serialize_node(node) actual.append(serialized_next_node) node = tc.application.create_next_node() self.assertEqual(actual, tc.expected_node_results)
def test_tags_arent_populated_from_empty_list_and_str(self) -> None: self.table_metadata6 = TableMetadata('hive', 'gold', 'test_schema6', 'test_table6', 'test_table6', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0)], tags=[]) self.table_metadata7 = TableMetadata('hive', 'gold', 'test_schema7', 'test_table7', 'test_table7', [ ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0)], tags="") # Test table tag fields are not populated from empty List node_row = self.table_metadata6.next_node() while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) self.assertNotEqual(node_row_serialized.get('LABEL'), 'Tag') node_row = self.table_metadata6.next_node() # Test table tag fields are not populated from empty str node_row = self.table_metadata7.next_node() while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) self.assertNotEqual(node_row_serialized.get('LABEL'), 'Tag') node_row = self.table_metadata7.next_node()
def test_create_nodes_no_description(self) -> None: schema = SchemaModel(schema_key='db://cluster.schema', schema='schema_name') schema_node = schema.create_next_node() serialized_schema_node = neo4_serializer.serialize_node(schema_node) self.assertDictEqual(serialized_schema_node, {'name': 'schema_name', 'KEY': 'db://cluster.schema', 'LABEL': 'Schema'}) self.assertIsNone(schema.create_next_node())
def test_serialize(self) -> None: col_readers = [ ColumnReader(database='db', cluster='gold', schema='scm', table='foo', column='*', user_email='*****@*****.**') ] table_col_usage = TableColumnUsage(col_readers=col_readers) node_row = table_col_usage.next_node() actual = [] while node_row: actual.append(neo4_serializer.serialize_node(node_row)) node_row = table_col_usage.next_node() expected = [{ 'first_name': '', 'last_name': '', 'full_name': '', 'employee_type': '', 'is_active:UNQUOTED': True, 'updated_at:UNQUOTED': 0, 'LABEL': 'User', 'slack_id': '', 'KEY': '*****@*****.**', 'github_username': '', 'team_name': '', 'email': '*****@*****.**', 'role_name': '' }] self.assertEqual(expected, actual) rel_row = table_col_usage.next_relation() actual = [] while rel_row: actual.append(neo4_serializer.serialize_relationship(rel_row)) rel_row = table_col_usage.next_relation() expected = [{ 'read_count:UNQUOTED': 1, 'END_KEY': '*****@*****.**', 'START_LABEL': 'Table', 'END_LABEL': 'User', 'START_KEY': 'db://gold.scm/foo', 'TYPE': 'READ_BY', 'REVERSE_TYPE': 'READ' }] self.assertEqual(expected, actual)
def test_create_nodes(self) -> None: actual = self.dashboard_query.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected = { 'url': 'http://foo.bar/query/baz', 'name': 'q_name', 'id': 'q_id', 'query_text': 'SELECT * FROM foo.bar', NODE_KEY: '_dashboard://gold.dg_id/d_id/query/q_id', NODE_LABEL: DashboardQuery.DASHBOARD_QUERY_LABEL } self.assertEqual(expected, actual_serialized)
def test_create_nodes(self) -> None: expected_nodes = [{ 'LABEL': 'Query', 'KEY': self._query_hash, 'sql': self.sql }] actual = [] node = self.query_metadata.create_next_node() while node: serialized_node = neo4_serializer.serialize_node(node) actual.append(serialized_node) node = self.query_metadata.create_next_node() self.assertEqual(actual, expected_nodes)
def test_create_nodes(self) -> None: expected_nodes = [{ 'LABEL': 'Source', 'KEY': f'{DB}://{CLUSTER}.{SCHEMA}/{TABLE}/_source', 'source': SOURCE, 'source_type': 'github' }] actual = [] node = self.table_source.create_next_node() while node: serialized_node = neo4_serializer.serialize_node(node) actual.append(serialized_node) node = self.table_source.create_next_node() self.assertEqual(expected_nodes, actual)
def test_dashboard_timestamp_nodes(self) -> None: actual = self.dashboard_last_modified.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected: Dict[str, Any] = { 'timestamp:UNQUOTED': 123456789, 'name': 'last_updated_timestamp', 'KEY': self.expected_ts_key, 'LABEL': 'Timestamp' } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone(self.dashboard_last_modified.create_next_node())
def test_basic_example(self) -> None: node_row = self.gencode.next_node() actual = [] while node_row: node_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_serialized) node_row = self.gencode.next_node() self.assertEqual(self.expected_nodes, actual) relation_row = self.gencode.next_relation() actual = [] while relation_row: relation_serialized = neo4_serializer.serialize_relationship(relation_row) actual.append(relation_serialized) relation_row = self.gencode.next_relation() self.assertEqual(self.expected_rels, actual)
def test_create_nodes(self) -> None: expected_nodes = [{ 'LABEL': QueryExecutionsMetadata.NODE_LABEL, 'KEY': self._expected_key, 'execution_count:UNQUOTED': 7, 'start_time:UNQUOTED': 10, 'window_duration': 'daily' }] actual = [] node = self.query_join_metadata.create_next_node() while node: serialized_node = neo4_serializer.serialize_node(node) actual.append(serialized_node) node = self.query_join_metadata.create_next_node() self.assertEqual(actual, expected_nodes)