class TestBadge(unittest.TestCase): def setUp(self) -> None: super(TestBadge, self).setUp() self.badge_metada = BadgeMetadata( start_label='Column', start_key='hive://default.base/test/ds', badges=[badge1, badge2]) def test_get_badge_key(self) -> None: badge_key = self.badge_metada.get_badge_key(badge1.name) self.assertEqual(badge_key, badge1.name) def test_create_nodes(self) -> None: nodes = self.badge_metada.create_nodes() self.assertEqual(len(nodes), 2) node1 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge1.category } node2 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge2.category } serialized_nodes = [ neo4_serializer.serialize_node(node) for node in nodes ] self.assertTrue(node1 in serialized_nodes) self.assertTrue(node2 in serialized_nodes) def test_create_nodes_neptune(self) -> None: nodes = self.badge_metada.create_nodes() serialized_nodes = [ neptune_serializer.convert_node(node) for node in nodes ] expected_node1 = { NEPTUNE_HEADER_ID: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_NODE_LABEL, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, BadgeMetadata.BADGE_CATEGORY + ':String(single)': badge1.category } expected_node2 = { NEPTUNE_HEADER_ID: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_NODE_LABEL, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, BadgeMetadata.BADGE_CATEGORY + ':String(single)': badge2.category } self.assertTrue(expected_node1 in serialized_nodes) self.assertTrue(expected_node2 in serialized_nodes) def test_bad_key_entity_match(self) -> None: column_label = 'Column' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=column_label, start_key=table_key, badges=[badge1, badge2]) def test_bad_entity_label(self) -> None: user_label = 'User' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=user_label, start_key=table_key, badges=[badge1, badge2]) def test_create_relation(self) -> None: relations = self.badge_metada.create_relation() serialized_relations = [ neo4_serializer.serialize_relationship(relation) for relation in relations ] self.assertEqual(len(relations), 2) relation1 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } relation2 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } self.assertTrue(relation1 in serialized_relations) self.assertTrue(relation2 in serialized_relations) def test_create_relation_neptune(self) -> None: relations = self.badge_metada.create_relation() serialized_relations: List[Dict] = sum([ neptune_serializer.convert_relationship(rel) for rel in relations ], []) neptune_forward_expected_1 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=self.badge_metada.start_key, to_vertex_id=BadgeMetadata.get_badge_key(badge1.name), label=BadgeMetadata.BADGE_RELATION_TYPE, ), NEPTUNE_RELATIONSHIP_HEADER_FROM: self.badge_metada.start_key, NEPTUNE_RELATIONSHIP_HEADER_TO: BadgeMetadata.get_badge_key(badge1.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_reversed_expected_1 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=BadgeMetadata.get_badge_key(badge1.name), to_vertex_id=self.badge_metada.start_key, label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE), NEPTUNE_RELATIONSHIP_HEADER_FROM: BadgeMetadata.get_badge_key(badge1.name), NEPTUNE_RELATIONSHIP_HEADER_TO: self.badge_metada.start_key, NEPTUNE_HEADER_LABEL: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_forward_expected_2 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=self.badge_metada.start_key, to_vertex_id=BadgeMetadata.get_badge_key(badge2.name), label=BadgeMetadata.BADGE_RELATION_TYPE, ), NEPTUNE_RELATIONSHIP_HEADER_FROM: self.badge_metada.start_key, NEPTUNE_RELATIONSHIP_HEADER_TO: BadgeMetadata.get_badge_key(badge2.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_reversed_expected_2 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=BadgeMetadata.get_badge_key(badge2.name), to_vertex_id=self.badge_metada.start_key, label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, ), NEPTUNE_RELATIONSHIP_HEADER_FROM: BadgeMetadata.get_badge_key(badge2.name), NEPTUNE_RELATIONSHIP_HEADER_TO: self.badge_metada.start_key, NEPTUNE_HEADER_LABEL: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } self.assertTrue(neptune_forward_expected_1 in serialized_relations) self.assertTrue(neptune_reversed_expected_1 in serialized_relations) self.assertTrue(neptune_forward_expected_2 in serialized_relations) self.assertTrue(neptune_reversed_expected_2 in serialized_relations)
class TestBadge(unittest.TestCase): def setUp(self) -> None: super(TestBadge, self).setUp() self.badge_metada = BadgeMetadata( start_label='Column', start_key='hive://default.base/test/ds', badges=[badge1, badge2]) def test_get_badge_key(self) -> None: badge_key = self.badge_metada.get_badge_key(badge1.name) self.assertEqual(badge_key, badge1.name) def test_create_nodes(self) -> None: nodes = self.badge_metada.create_nodes() self.assertEqual(len(nodes), 2) node1 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge1.category } node2 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge2.category } serialized_nodes = [ neo4_serializer.serialize_node(node) for node in nodes ] self.assertTrue(node1 in serialized_nodes) self.assertTrue(node2 in serialized_nodes) def test_bad_key_entity_match(self) -> None: column_label = 'Column' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=column_label, start_key=table_key, badges=[badge1, badge2]) def test_bad_entity_label(self) -> None: user_label = 'User' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=user_label, start_key=table_key, badges=[badge1, badge2]) def test_create_relation(self) -> None: relations = self.badge_metada.create_relation() serialized_relations = [ neo4_serializer.serialize_relationship(relation) for relation in relations ] self.assertEqual(len(relations), 2) relation1 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } relation2 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } self.assertTrue(relation1 in serialized_relations) self.assertTrue(relation2 in serialized_relations)
def _create_next_relation(self) -> Iterator[GraphRelationship]: schema_table_relationship = GraphRelationship( start_key=self._get_schema_key(), start_label=TableMetadata.SCHEMA_NODE_LABEL, end_key=self._get_table_key(), end_label=TableMetadata.TABLE_NODE_LABEL, type=TableMetadata.SCHEMA_TABLE_RELATION_TYPE, reverse_type=TableMetadata.TABLE_SCHEMA_RELATION_TYPE, attributes={}) yield schema_table_relationship if self.description: yield self.description.get_relation( TableMetadata.TABLE_NODE_LABEL, self._get_table_key(), self._get_table_description_key(self.description)) if self.tags: for tag in self.tags: tag_relationship = GraphRelationship( start_label=TableMetadata.TABLE_NODE_LABEL, start_key=self._get_table_key(), end_label=TagMetadata.TAG_NODE_LABEL, end_key=TagMetadata.get_tag_key(tag), type=TableMetadata.TABLE_TAG_RELATION_TYPE, reverse_type=TableMetadata.TAG_TABLE_RELATION_TYPE, attributes={}) yield tag_relationship for col in self.columns: column_relationship = GraphRelationship( start_label=TableMetadata.TABLE_NODE_LABEL, start_key=self._get_table_key(), end_label=ColumnMetadata.COLUMN_NODE_LABEL, end_key=self._get_col_key(col), type=TableMetadata.TABLE_COL_RELATION_TYPE, reverse_type=TableMetadata.COL_TABLE_RELATION_TYPE, attributes={}) yield column_relationship if col.description: yield col.description.get_relation( ColumnMetadata.COLUMN_NODE_LABEL, self._get_col_key(col), self._get_col_description_key(col, col.description)) if col.badges: badge_metadata = BadgeMetadata( start_label=ColumnMetadata.COLUMN_NODE_LABEL, start_key=self._get_col_key(col), badges=col.badges) badge_relations = badge_metadata.create_relation() for relation in badge_relations: yield relation others = [ GraphRelationship( start_label=TableMetadata.DATABASE_NODE_LABEL, end_label=TableMetadata.CLUSTER_NODE_LABEL, start_key=self._get_database_key(), end_key=self._get_cluster_key(), type=TableMetadata.DATABASE_CLUSTER_RELATION_TYPE, reverse_type=TableMetadata.CLUSTER_DATABASE_RELATION_TYPE, attributes={}), GraphRelationship( start_label=TableMetadata.CLUSTER_NODE_LABEL, end_label=TableMetadata.SCHEMA_NODE_LABEL, start_key=self._get_cluster_key(), end_key=self._get_schema_key(), type=TableMetadata.CLUSTER_SCHEMA_RELATION_TYPE, reverse_type=TableMetadata.SCHEMA_CLUSTER_RELATION_TYPE, attributes={}) ] for rel_tuple in others: if (rel_tuple.start_key, rel_tuple.end_key, rel_tuple.type) not in TableMetadata.serialized_rels_keys: TableMetadata.serialized_rels_keys.add( (rel_tuple.start_key, rel_tuple.end_key, rel_tuple.type)) yield rel_tuple
def _create_next_relation(self) -> Iterator[Any]: yield { RELATION_START_LABEL: TableMetadata.SCHEMA_NODE_LABEL, RELATION_END_LABEL: TableMetadata.TABLE_NODE_LABEL, RELATION_START_KEY: self._get_schema_key(), RELATION_END_KEY: self._get_table_key(), RELATION_TYPE: TableMetadata.SCHEMA_TABLE_RELATION_TYPE, RELATION_REVERSE_TYPE: TableMetadata.TABLE_SCHEMA_RELATION_TYPE } if self.description: yield self.description.get_relation(TableMetadata.TABLE_NODE_LABEL, self._get_table_key(), self._get_table_description_key(self.description)) if self.tags: for tag in self.tags: yield { RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL, RELATION_END_LABEL: TagMetadata.TAG_NODE_LABEL, RELATION_START_KEY: self._get_table_key(), RELATION_END_KEY: TagMetadata.get_tag_key(tag), RELATION_TYPE: TableMetadata.TABLE_TAG_RELATION_TYPE, RELATION_REVERSE_TYPE: TableMetadata.TAG_TABLE_RELATION_TYPE, } for col in self.columns: yield { RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL, RELATION_END_LABEL: ColumnMetadata.COLUMN_NODE_LABEL, RELATION_START_KEY: self._get_table_key(), RELATION_END_KEY: self._get_col_key(col), RELATION_TYPE: TableMetadata.TABLE_COL_RELATION_TYPE, RELATION_REVERSE_TYPE: TableMetadata.COL_TABLE_RELATION_TYPE } if col.description: yield col.description.get_relation(ColumnMetadata.COLUMN_NODE_LABEL, self._get_col_key(col), self._get_col_description_key(col, col.description)) if col.badges: badge_metadata = BadgeMetadata(db_name=self._get_database_key(), schema=self._get_schema_key(), start_label=ColumnMetadata.COLUMN_NODE_LABEL, start_key=self._get_col_key(col), badges=col.badges, cluster=self._get_cluster_key()) badge_relations = badge_metadata.create_relation() for relation in badge_relations: yield relation others = [ RelTuple(start_label=TableMetadata.DATABASE_NODE_LABEL, end_label=TableMetadata.CLUSTER_NODE_LABEL, start_key=self._get_database_key(), end_key=self._get_cluster_key(), type=TableMetadata.DATABASE_CLUSTER_RELATION_TYPE, reverse_type=TableMetadata.CLUSTER_DATABASE_RELATION_TYPE), RelTuple(start_label=TableMetadata.CLUSTER_NODE_LABEL, end_label=TableMetadata.SCHEMA_NODE_LABEL, start_key=self._get_cluster_key(), end_key=self._get_schema_key(), type=TableMetadata.CLUSTER_SCHEMA_RELATION_TYPE, reverse_type=TableMetadata.SCHEMA_CLUSTER_RELATION_TYPE) ] for rel_tuple in others: if rel_tuple not in TableMetadata.serialized_rels: TableMetadata.serialized_rels.add(rel_tuple) yield { RELATION_START_LABEL: rel_tuple.start_label, RELATION_END_LABEL: rel_tuple.end_label, RELATION_START_KEY: rel_tuple.start_key, RELATION_END_KEY: rel_tuple.end_key, RELATION_TYPE: rel_tuple.type, RELATION_REVERSE_TYPE: rel_tuple.reverse_type }