示例#1
0
    def test_get_table_view_only(self) -> None:
        col_usage_return_value = copy.deepcopy(self.col_usage_return_value)
        for col in col_usage_return_value:
            col['tbl']['is_view'] = True

        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute:
            mock_execute.side_effect = [col_usage_return_value, [], self.table_level_return_value]

            neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000)
            table = neo4j_proxy.get_table(table_uri='dummy_uri')

            expected = Table(database='hive', cluster='gold', schema='foo_schema', name='foo_table',
                             tags=[Tag(tag_name='test', tag_type='default')],
                             badges=[Tag(tag_name='golden', tag_type='badge')],
                             table_readers=[], description='foo description',
                             watermarks=[Watermark(watermark_type='high_watermark',
                                                   partition_key='ds',
                                                   partition_value='fake_value',
                                                   create_time='fake_time'),
                                         Watermark(watermark_type='low_watermark',
                                                   partition_key='ds',
                                                   partition_value='fake_value',
                                                   create_time='fake_time')],
                             columns=[Column(name='bar_id_1', description='bar col description', col_type='varchar',
                                             sort_order=0, stats=[Statistics(start_epoch=1,
                                                                             end_epoch=1,
                                                                             stat_type='avg',
                                                                             stat_val='1')]),
                                      Column(name='bar_id_2', description='bar col2 description', col_type='bigint',
                                             sort_order=1, stats=[Statistics(start_epoch=2,
                                                                             end_epoch=2,
                                                                             stat_type='avg',
                                                                             stat_val='2')])],
                             owners=[User(email='*****@*****.**')],
                             table_writer=Application(application_url=self.table_writer['application_url'],
                                                      description=self.table_writer['description'],
                                                      name=self.table_writer['name'],
                                                      id=self.table_writer['id']),
                             last_updated_timestamp=1,
                             source=Source(source='/source_file_loc',
                                           source_type='github'),
                             is_view=True,
                             programmatic_descriptions=[
                                 ProgrammaticDescription(source='quality_report',
                                                         text='Test Test'),
                                 ProgrammaticDescription(source='s3_crawler',
                                                         text='Test Test Test')
                             ])

            self.assertEqual(str(expected), str(table))
示例#2
0
    def test_get_table(self) -> None:
        self._mock_get_table_entity()
        response = self.proxy.get_table(table_uri=self.table_uri)

        classif_name = self.classification_entity['classifications'][0]['typeName']
        ent_attrs = cast(dict, self.entity1['attributes'])

        col_attrs = cast(dict, self.test_column['attributes'])
        col_metadata_attrs = cast(dict, self.column_metadata_entity['attributes'])
        exp_col_stats = list()

        for stats in col_metadata_attrs['statistics']:
            exp_col_stats.append(
                Statistics(
                    stat_type=stats['attributes']['stat_name'],
                    stat_val=stats['attributes']['stat_val'],
                    start_epoch=stats['attributes']['start_epoch'],
                    end_epoch=stats['attributes']['end_epoch'],
                )
            )
        exp_col = Column(name=col_attrs['name'],
                         description='column description',
                         col_type='Managed',
                         sort_order=col_attrs['position'],
                         stats=exp_col_stats)
        expected = Table(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=ent_attrs['name'],
                         tags=[Tag(tag_name=classif_name, tag_type="default")],
                         description=ent_attrs['description'],
                         owners=[User(email=ent_attrs['owner'])],
                         columns=[exp_col],
                         last_updated_timestamp=cast(int, self.entity1['updateTime']))
        self.assertEqual(str(expected), str(response))
示例#3
0
    def _get_table(self, custom_stats_format: bool = False) -> None:
        if custom_stats_format:
            test_exp_col = self.test_exp_col_stats_formatted
        else:
            test_exp_col = self.test_exp_col_stats_raw
        ent_attrs = cast(dict, self.entity1['attributes'])
        self._mock_get_table_entity()
        self._create_mocked_report_entities_collection()
        self.proxy._get_owners = MagicMock(
            return_value=[User(email=ent_attrs['owner'])])  # type: ignore
        self.proxy._driver.entity_bulk = MagicMock(
            return_value=self.report_entity_collection)
        response = self.proxy.get_table(table_uri=self.table_uri)

        classif_name = self.classification_entity['classifications'][0][
            'typeName']

        col_attrs = cast(dict, self.test_column['attributes'])
        exp_col_stats = list()

        for stats in test_exp_col:
            exp_col_stats.append(
                Statistics(
                    stat_type=stats['attributes']['stat_name'],
                    stat_val=stats['attributes']['stat_val'],
                    start_epoch=stats['attributes']['start_epoch'],
                    end_epoch=stats['attributes']['end_epoch'],
                ))

        exp_col = Column(name=col_attrs['name'],
                         description='column description',
                         col_type='Managed',
                         sort_order=col_attrs['position'],
                         stats=exp_col_stats)
        expected = Table(
            database=self.entity_type,
            cluster=self.cluster,
            schema=self.db,
            name=ent_attrs['name'],
            tags=[Tag(tag_name=classif_name, tag_type="default")],
            description=ent_attrs['description'],
            owners=[User(email=ent_attrs['owner'])],
            resource_reports=[
                ResourceReport(name='test_report', url='http://test'),
                ResourceReport(name='test_report3', url='http://test3')
            ],
            last_updated_timestamp=int(str(self.entity1['updateTime'])[:10]),
            columns=[exp_col] * self.active_columns,
            programmatic_descriptions=[
                ProgrammaticDescription(source='test parameter key a',
                                        text='testParameterValueA'),
                ProgrammaticDescription(source='test parameter key b',
                                        text='testParameterValueB')
            ],
            is_view=False)

        self.assertEqual(str(expected), str(response))
    def _exec_col_query(self, table_uri: str) -> Tuple:
        # Return Value: (Columns, Last Processed Record)

        column_level_query = textwrap.dedent("""
        MATCH (db:Database)-[:CLUSTER]->(clstr:Cluster)-[:SCHEMA]->(schema:Schema)
        -[:TABLE]->(tbl:Table {key: $tbl_key})-[:COLUMN]->(col:Column)
        OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(tbl_dscrpt:Description)
        OPTIONAL MATCH (col:Column)-[:DESCRIPTION]->(col_dscrpt:Description)
        OPTIONAL MATCH (col:Column)-[:STAT]->(stat:Stat)
        RETURN db, clstr, schema, tbl, tbl_dscrpt, col, col_dscrpt, collect(distinct stat) as col_stats
        ORDER BY col.sort_order;""")

        tbl_col_neo4j_records = self._execute_cypher_query(
            statement=column_level_query, param_dict={'tbl_key': table_uri})
        cols = []
        last_neo4j_record = None
        for tbl_col_neo4j_record in tbl_col_neo4j_records:
            # Getting last record from this for loop as Neo4j's result's random access is O(n) operation.
            col_stats = []
            for stat in tbl_col_neo4j_record['col_stats']:
                col_stat = Statistics(stat_type=stat['stat_name'],
                                      stat_val=stat['stat_val'],
                                      start_epoch=int(
                                          float(stat['start_epoch'])),
                                      end_epoch=int(float(stat['end_epoch'])))
                col_stats.append(col_stat)

            last_neo4j_record = tbl_col_neo4j_record
            col = Column(
                name=tbl_col_neo4j_record['col']['name'],
                description=self._safe_get(tbl_col_neo4j_record, 'col_dscrpt',
                                           'description'),
                col_type=tbl_col_neo4j_record['col']['type'],
                sort_order=int(tbl_col_neo4j_record['col']['sort_order']),
                stats=col_stats)

            cols.append(col)

        if not cols:
            raise NotFoundException(
                'Table URI( {table_uri} ) does not exist'.format(
                    table_uri=table_uri))

        return sorted(cols,
                      key=lambda item: item.sort_order), last_neo4j_record
示例#5
0
    def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \
            Union[List[Column], List]:
        """
        Helper function to fetch the columns from entity and serialize them
        using Column and Statistics model.
        :param entity: EntityUniqueAttribute object,
        along with relationshipAttributes
        :return: A list of Column objects, if there are any columns available,
        else an empty list.
        """
        columns = list()
        for column in entity.entity[self.REL_ATTRS_KEY].get(
                'columns') or list():
            col_entity = entity.referredEntities[column['guid']]
            col_attrs = col_entity[self.ATTRS_KEY]
            col_rel_attrs = col_entity[self.REL_ATTRS_KEY]
            col_metadata = col_rel_attrs.get('metadata')
            statistics = list()

            if col_metadata:
                col_metadata = entity.referredEntities.get(
                    col_metadata.get('guid'))

                for stats in col_metadata['attributes'].get(
                        'statistics') or list():
                    stats_attrs = stats['attributes']
                    statistics.append(
                        Statistics(
                            stat_type=stats_attrs.get('stat_name'),
                            stat_val=stats_attrs.get('stat_val'),
                            start_epoch=stats_attrs.get('start_epoch'),
                            end_epoch=stats_attrs.get('end_epoch'),
                        ))

            columns.append(
                Column(
                    name=col_attrs.get('name'),
                    description=col_attrs.get('description')
                    or col_attrs.get('comment'),
                    col_type=col_attrs.get('type')
                    or col_attrs.get('dataType'),
                    sort_order=col_attrs.get('position'),
                    stats=statistics,
                ))
        return sorted(columns, key=lambda item: item.sort_order)
    def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \
            Union[List[Column], List]:
        """
        Helper function to fetch the columns from entity and serialize them
        using Column and Statistics model.
        :param entity: EntityUniqueAttribute object,
        along with relationshipAttributes
        :return: A list of Column objects, if there are any columns available,
        else an empty list.
        """
        columns = list()
        for column in entity.entity[self.REL_ATTRS_KEY].get(
                'columns') or list():
            column_status = column.get('entityStatus', 'inactive').lower()

            if column_status != 'active':
                continue

            col_entity = entity.referredEntities[column[self.GUID_KEY]]
            col_attrs = col_entity[self.ATTRS_KEY]
            statistics = list()

            for stats in col_attrs.get('statistics') or list():
                stats_attrs = stats['attributes']

                stat_type = stats_attrs.get('stat_name')

                stat_format = self.STATISTICS_FORMAT_SPEC.get(
                    stat_type, dict())

                if not stat_format.get('drop', False):
                    stat_type = stat_format.get('new_name', stat_type)

                    stat_val = stats_attrs.get('stat_val')

                    format_val = stat_format.get('format')

                    if format_val:
                        stat_val = format_val.format(stat_val)
                    else:
                        stat_val = str(stat_val)

                    start_epoch = stats_attrs.get('start_epoch')
                    end_epoch = stats_attrs.get('end_epoch')

                    statistics.append(
                        Statistics(
                            stat_type=stat_type,
                            stat_val=stat_val,
                            start_epoch=start_epoch,
                            end_epoch=end_epoch,
                        ))

            columns.append(
                Column(
                    name=col_attrs.get('name'),
                    description=col_attrs.get('description')
                    or col_attrs.get('comment'),
                    col_type=col_attrs.get('type')
                    or col_attrs.get('dataType'),
                    sort_order=col_attrs.get('position') or 9999,
                    stats=statistics,
                ))
        return sorted(columns, key=lambda item: item.sort_order)