def get_workunits(self) -> Iterable[MetadataWorkUnit]: # for future developers: The actual logic of this ingestion wants to be executed, in order: # 1) the groups # 2) the groups' memberships # 3) the users # Create MetadataWorkUnits for CorpGroups if self.config.ingest_groups: # 1) the groups for azure_ad_groups in self._get_azure_ad_groups(): logger.info("Processing another groups batch...") datahub_corp_group_snapshots = self._map_azure_ad_groups( azure_ad_groups) for datahub_corp_group_snapshot in datahub_corp_group_snapshots: mce = MetadataChangeEvent( proposedSnapshot=datahub_corp_group_snapshot) wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu # Populate GroupMembership Aspects for CorpUsers datahub_corp_user_urn_to_group_membership: Dict[ str, GroupMembershipClass] = {} if (self.config.ingest_group_membership and len(self.selected_azure_ad_groups) > 0): # 2) the groups' membership for azure_ad_group in self.selected_azure_ad_groups: datahub_corp_group_urn = self._map_azure_ad_group_to_urn( azure_ad_group) if not datahub_corp_group_urn: error_str = "Failed to extract DataHub Group Name from Azure AD Group named {}. Skipping...".format( azure_ad_group.get("displayName")) self.report.report_failure("azure_ad_group_mapping", error_str) continue # Extract and map users for each group for azure_ad_group_users in self._get_azure_ad_group_users( azure_ad_group): # if group doesn't have any members, continue if not azure_ad_group_users: continue for azure_ad_user in azure_ad_group_users: datahub_corp_user_urn = self._map_azure_ad_user_to_urn( azure_ad_user) if not datahub_corp_user_urn: error_str = "Failed to extract DataHub Username from Azure ADUser {}. Skipping...".format( azure_ad_user.get("displayName")) self.report.report_failure("azure_ad_user_mapping", error_str) continue self.azure_ad_groups_users.append(azure_ad_user) # update/create the GroupMembership aspect for this group member. if (datahub_corp_user_urn in datahub_corp_user_urn_to_group_membership): datahub_corp_user_urn_to_group_membership[ datahub_corp_user_urn].groups.append( datahub_corp_group_urn) else: datahub_corp_user_urn_to_group_membership[ datahub_corp_user_urn] = GroupMembershipClass( groups=[datahub_corp_group_urn]) if (self.config.ingest_groups_users and self.config.ingest_group_membership and not self.config.ingest_users): # 3) the users # getting infos about the users belonging to the found groups datahub_corp_user_snapshots = self._map_azure_ad_users( self.azure_ad_groups_users) yield from self.ingest_ad_users( datahub_corp_user_snapshots, datahub_corp_user_urn_to_group_membership) # Create MetadatWorkUnits for CorpUsers if self.config.ingest_users: # 3) the users for azure_ad_users in self._get_azure_ad_users(): # azure_ad_users = next(self._get_azure_ad_users()) datahub_corp_user_snapshots = self._map_azure_ad_users( azure_ad_users) yield from self.ingest_ad_users( datahub_corp_user_snapshots, datahub_corp_user_urn_to_group_membership, )
def get_workunits(self) -> Iterable[MetadataWorkUnit]: # for future developers: The actual logic of this ingestion wants to be executed, in order: # 1) the groups # 2) the groups' memberships # 3) the users # Create MetadataWorkUnits for CorpGroups if self.config.ingest_groups: # 1) the groups for azure_ad_groups in self._get_azure_ad_groups(): logger.info("Processing another groups batch...") datahub_corp_group_snapshots = self._map_azure_ad_groups( azure_ad_groups) for datahub_corp_group_snapshot in datahub_corp_group_snapshots: mce = MetadataChangeEvent( proposedSnapshot=datahub_corp_group_snapshot) wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu # Populate GroupMembership Aspects for CorpUsers datahub_corp_user_urn_to_group_membership: Dict[ str, GroupMembershipClass] = defaultdict( lambda: GroupMembershipClass(groups=[])) if (self.config.ingest_group_membership and len(self.selected_azure_ad_groups) > 0): # 2) the groups' membership for azure_ad_group in self.selected_azure_ad_groups: # Azure supports nested groups, but not DataHub. We need to explode the nested groups into a flat list. datahub_corp_group_urn = self._map_azure_ad_group_to_urn( azure_ad_group) if not datahub_corp_group_urn: error_str = f"Failed to extract DataHub Group Name from Azure AD Group named {azure_ad_group.get('displayName')}. Skipping..." self.report.report_failure("azure_ad_group_mapping", error_str) continue self._add_group_members_to_group_membership( datahub_corp_group_urn, azure_ad_group, datahub_corp_user_urn_to_group_membership, ) if (self.config.ingest_groups_users and self.config.ingest_group_membership and not self.config.ingest_users): # 3) the users # getting infos about the users belonging to the found groups datahub_corp_user_snapshots = self._map_azure_ad_users( self.azure_ad_groups_users) yield from self.ingest_ad_users( datahub_corp_user_snapshots, datahub_corp_user_urn_to_group_membership) # Create MetadataWorkUnits for CorpUsers if self.config.ingest_users: # 3) the users for azure_ad_users in self._get_azure_ad_users(): # azure_ad_users = next(self._get_azure_ad_users()) datahub_corp_user_snapshots = self._map_azure_ad_users( azure_ad_users) yield from self.ingest_ad_users( datahub_corp_user_snapshots, datahub_corp_user_urn_to_group_membership, )
def get_workunits(self) -> Iterable[MetadataWorkUnit]: # Step 1: Produce MetadataWorkUnits for CorpGroups. if self.config.ingest_groups: okta_groups = list(self._get_okta_groups()) datahub_corp_group_snapshots = self._map_okta_groups(okta_groups) for datahub_corp_group_snapshot in datahub_corp_group_snapshots: mce = MetadataChangeEvent( proposedSnapshot=datahub_corp_group_snapshot) wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu # Step 2: Populate GroupMembership Aspects for CorpUsers datahub_corp_user_urn_to_group_membership: Dict[ str, GroupMembershipClass] = {} if self.config.ingest_group_membership and okta_groups is not None: # Fetch membership for each group. for okta_group in okta_groups: datahub_corp_group_urn = self._map_okta_group_profile_to_urn( okta_group.profile) if datahub_corp_group_urn is None: error_str = f"Failed to extract DataHub Group Name from Okta Group: Invalid regex pattern provided or missing profile attribute for group named {okta_group.profile.name}. Skipping..." logger.error(error_str) self.report.report_failure("okta_group_mapping", error_str) continue # Extract and map users for each group. okta_group_users = self._get_okta_group_users(okta_group) for okta_user in okta_group_users: datahub_corp_user_urn = self._map_okta_user_profile_to_urn( okta_user.profile) if datahub_corp_user_urn is None: error_str = f"Failed to extract DataHub Username from Okta User: Invalid regex pattern provided or missing profile attribute for User with login {okta_user.profile.login}. Skipping..." logger.error(error_str) self.report.report_failure("okta_user_mapping", error_str) continue # Either update or create the GroupMembership aspect for this group member. # TODO: Production of the GroupMembership aspect will overwrite the existing # group membership for the DataHub user. if (datahub_corp_user_urn in datahub_corp_user_urn_to_group_membership): datahub_corp_user_urn_to_group_membership[ datahub_corp_user_urn].groups.append( datahub_corp_group_urn) else: datahub_corp_user_urn_to_group_membership[ datahub_corp_user_urn] = GroupMembershipClass( groups=[datahub_corp_group_urn]) # Step 3: Produce MetadataWorkUnits for CorpUsers. if self.config.ingest_users: okta_users = self._get_okta_users() filtered_okta_users = filter(self._filter_okta_user, okta_users) datahub_corp_user_snapshots = self._map_okta_users( filtered_okta_users) for datahub_corp_user_snapshot in datahub_corp_user_snapshots: # Add GroupMembership aspect populated in Step 2 if applicable. if (datahub_corp_user_snapshot.urn in datahub_corp_user_urn_to_group_membership): datahub_group_membership = ( datahub_corp_user_urn_to_group_membership.get( datahub_corp_user_snapshot.urn)) assert datahub_group_membership is not None datahub_corp_user_snapshot.aspects.append( datahub_group_membership) mce = MetadataChangeEvent( proposedSnapshot=datahub_corp_user_snapshot) wu = MetadataWorkUnit(id=datahub_corp_user_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu
def get_workunits(self) -> Iterable[MetadataWorkUnit]: # Create MetadataWorkUnits for CorpGroups if self.config.ingest_groups: azure_ad_groups = next(self._get_azure_ad_groups()) datahub_corp_group_snapshots = self._map_azure_ad_groups(azure_ad_groups) for datahub_corp_group_snapshot in datahub_corp_group_snapshots: mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_group_snapshot) wu = MetadataWorkUnit(id=datahub_corp_group_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu # Populate GroupMembership Aspects for CorpUsers datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = {} if self.config.ingest_group_membership and azure_ad_groups: # Fetch membership for each group for azure_ad_group in azure_ad_groups: datahub_corp_group_urn = self._map_azure_ad_group_to_urn(azure_ad_group) if not datahub_corp_group_urn: error_str = "Failed to extract DataHub Group Name from Azure AD Group named {}. Skipping...".format( azure_ad_group.get("displayName") ) self.report.report_failure("azure_ad_group_mapping", error_str) continue # Extract and map users for each group azure_ad_group_users = next( self._get_azure_ad_group_users(azure_ad_group) ) # if group doesn't have any members, continue if not azure_ad_group_users: continue for azure_ad_user in azure_ad_group_users: datahub_corp_user_urn = self._map_azure_ad_user_to_urn( azure_ad_user ) if not datahub_corp_user_urn: error_str = "Failed to extract DataHub Username from Azure ADUser {}. Skipping...".format( azure_ad_user.get("displayName") ) self.report.report_failure("azure_ad_user_mapping", error_str) continue # update/create the GroupMembership aspect for this group member. if ( datahub_corp_user_urn in datahub_corp_user_urn_to_group_membership ): datahub_corp_user_urn_to_group_membership[ datahub_corp_user_urn ].groups.append(datahub_corp_group_urn) else: datahub_corp_user_urn_to_group_membership[ datahub_corp_user_urn ] = GroupMembershipClass(groups=[datahub_corp_group_urn]) # Create MetadatWorkUnits for CorpUsers if self.config.ingest_users: azure_ad_users = next(self._get_azure_ad_users()) datahub_corp_user_snapshots = self._map_azure_ad_users(azure_ad_users) for datahub_corp_user_snapshot in datahub_corp_user_snapshots: # Add GroupMembership if applicable if ( datahub_corp_user_snapshot.urn in datahub_corp_user_urn_to_group_membership ): datahub_group_membership = ( datahub_corp_user_urn_to_group_membership.get( datahub_corp_user_snapshot.urn ) ) assert datahub_group_membership datahub_corp_user_snapshot.aspects.append(datahub_group_membership) mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot) wu = MetadataWorkUnit(id=datahub_corp_user_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu