def _get_readers(self, qualified_name: str, top: Optional[int] = 15) -> List[Reader]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': top, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'STARTSWITH', 'attributeValue': qualified_name.split('@')[0] + '.' }, { 'attributeName': 'count', 'operator': 'gte', 'attributeValue': f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}' }] }, 'attributes': ['count', self.QN_KEY], 'sortBy': 'count', 'sortOrder': 'DESCENDING' } search_results = self._driver.search_basic.create( data=params, ignoreRelationships=False) readers = [] for record in search_results.entities: readers.append(record.guid) results = [] if readers: read_entities = extract_entities( self._driver.entity_bulk(guid=readers, ignoreRelationships=False)) for read_entity in read_entities: reader_qn = read_entity.relationshipAttributes['user'][ 'displayText'] reader_details = self.user_detail_method(reader_qn) or { 'email': reader_qn, 'user_id': reader_qn } reader = Reader(user=User(**reader_details), read_count=read_entity.attributes['count']) results.append(reader) return results
def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary, Any]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc owned by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ resources = list() user_entity = self._driver.entity_unique_attribute( self.USER_TYPE, qualifiedName=user_id).entity if not user_entity: LOGGER.exception(f'User ({user_id}) not found in Atlas') raise NotFoundException(f'User {user_id} not found.') resource_guids = list() for item in user_entity[self.REL_ATTRS_KEY].get('ownerOf') or list(): if (item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE and item['typeName'] == resource_type): resource_guids.append(item[self.GUID_KEY]) entities = extract_entities( self._driver.entity_bulk(guid=resource_guids, ignoreRelationships=True)) if resource_type == self.TABLE_ENTITY: resources = self._serialize_popular_tables(entities) return resources
def _get_readers(self, entity: EntityUniqueAttribute, top: Optional[int] = 15) -> List[Reader]: _readers = entity.get('relationshipAttributes', dict()).get('readers', list()) guids = [_reader.get('guid') for _reader in _readers if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE] if not guids: return [] readers = extract_entities(self._driver.entity_bulk(guid=guids, ignoreRelationships=False)) _result = [] for _reader in readers: read_count = _reader.attributes['count'] if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']): reader_qn = _reader.relationshipAttributes['user']['displayText'] reader_details = self._get_user_details(reader_qn) reader = Reader(user=User(**reader_details), read_count=read_count) _result.append(reader) result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top] return result
def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary, Any]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc owned by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ resources = list() if resource_type == ResourceType.Table.name: type_regex = "(.*)_table$" # elif resource_type == ResourceType.Dashboard.name: # type_regex = "Dashboard" else: LOGGER.exception(f'Resource Type ({resource_type}) is not yet implemented') raise NotImplemented user_entity = self._driver.entity_unique_attribute(self.USER_TYPE, qualifiedName=user_id).entity if not user_entity: LOGGER.exception(f'User ({user_id}) not found in Atlas') raise NotFoundException(f'User {user_id} not found.') resource_guids = set() for item in user_entity[self.REL_ATTRS_KEY].get('owns') or list(): if (item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE and re.compile(type_regex).match(item['typeName'])): resource_guids.add(item[self.GUID_KEY]) params = { 'typeName': self.TABLE_ENTITY, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [ { 'attributeName': 'owner', 'operator': 'startsWith', 'attributeValue': user_id.lower() } ] }, 'attributes': [self.GUID_KEY] } table_entities = self._driver.search_basic.create(data=params) for table in table_entities.entities: resource_guids.add(table.guid) if resource_guids: entities = extract_entities(self._driver.entity_bulk(guid=list(resource_guids), ignoreRelationships=True)) if resource_type == ResourceType.Table.name: resources = self._serialize_popular_tables(entities) else: LOGGER.info(f'User ({user_id}) does not own any "{resource_type}"') return resources
def _get_reports(self, guids: List[str]) -> List[ResourceReport]: reports = [] if guids: report_entities_collection = self._driver.entity_bulk(guid=guids) for report_entity in extract_entities(report_entities_collection): try: if report_entity.status == self.ENTITY_ACTIVE_STATUS: report_attrs = report_entity.attributes reports.append( ResourceReport(name=report_attrs['name'], url=report_attrs['url'])) except (KeyError, AttributeError) as ex: LOGGER.exception( 'Error while accessing table report: {}. {}'.format( str(report_entity), str(ex))) parsed_reports = app.config['RESOURCE_REPORT_CLIENT'](reports) \ if app.config['RESOURCE_REPORT_CLIENT'] else reports return parsed_reports
def get_frequently_used_tables( self, *, user_email: str) -> Dict[str, List[PopularTable]]: user = self._driver.entity_unique_attribute( self.USER_TYPE, qualifiedName=user_email).entity readers_guids = [] for user_reads in user['relationshipAttributes'].get('entityReads'): entity_status = user_reads['entityStatus'] relationship_status = user_reads['relationshipStatus'] if entity_status == 'ACTIVE' and relationship_status == 'ACTIVE': readers_guids.append(user_reads['guid']) readers = extract_entities( self._driver.entity_bulk(guid=readers_guids, ignoreRelationships=True)) _results = {} for reader in readers: entity_uri = reader.attributes.get(self.ENTITY_URI_KEY) count = reader.attributes.get('count') if count: details = self._extract_info_from_uri(table_uri=entity_uri) _results[count] = dict(cluster=details.get('cluster'), name=details.get('name'), schema=details.get('db'), database=details.get('entity')) sorted_counts = sorted(_results.keys()) results = [] for count in sorted_counts: data: dict = _results.get(count, dict()) table = PopularTable(**data) results.append(table) return {'table': results}
def _get_table_watermarks( self, entity: EntityUniqueAttribute) -> List[Watermark]: partition_value_format = '%Y-%m-%d %H:%M:%S' _partitions = entity.get('relationshipAttributes', dict()).get('partitions', list()) guids = [ _partition.get('guid') for _partition in _partitions if _partition.get('entityStatus') == Status.ACTIVE and _partition.get('relationshipStatus') == Status.ACTIVE ] if not guids: return [] partition_key = AtlasProxy._render_partition_key_name(entity) full_partitions = extract_entities( self._driver.entity_bulk(guid=list(guids), ignoreRelationships=True)) watermark_date_format = AtlasProxy._select_watermark_format( [p.attributes.get('name') for p in full_partitions]) partitions = {} for partition in full_partitions: partition_name = partition.attributes.get('name') if partition_name and watermark_date_format: partition_date, _ = AtlasProxy._validate_date( partition_name, watermark_date_format) if partition_date: _partition_create_time = self._parse_date( partition.createTime) or 0.0 partition_create_time = datetime.datetime.fromtimestamp( _partition_create_time).strftime( partition_value_format) common_values = { 'partition_value': datetime.datetime.strftime(partition_date, partition_value_format), 'create_time': partition_create_time, 'partition_key': partition_key } partitions[partition_date] = common_values if partitions: low_watermark_date = min(partitions.keys()) high_watermark_date = max(partitions.keys()) low_watermark = Watermark(watermark_type='low_watermark', **partitions.get(low_watermark_date)) high_watermark = Watermark(watermark_type='high_watermark', **partitions.get(high_watermark_date)) return [low_watermark, high_watermark] else: return []