def __init__(self,
                 dashboard_group_id: Optional[str],
                 dashboard_id: Optional[str],
                 email: str,
                 view_count: int,
                 should_create_user_node: Optional[bool] = False,
                 product: Optional[str] = '',
                 cluster: Optional[str] = 'gold',
                 **kwargs: Any) -> None:
        """

        :param dashboard_group_id:
        :param dashboard_id:
        :param email:
        :param view_count:
        :param should_create_user_node: Enable this if it is fine to create/update User node with only with email
        address. Please be advised that other fields will be emptied. Current use case is to create anonymous user.
        For example, Mode dashboard does not provide which user viewed the dashboard and anonymous user can be used
        to show the usage.
        :param product:
        :param cluster:
        :param kwargs:
        """
        self._dashboard_group_id = dashboard_group_id
        self._dashboard_id = dashboard_id
        self._email = email
        self._view_count = int(view_count)
        self._product = product
        self._cluster = cluster
        self._user_model = User(email=email)
        self._should_create_user_node = bool(should_create_user_node)
        self._relation_iterator = self._create_relation_iterator()
        self._record_iterator = self._create_record_iterator()
Пример #2
0
 def setUp(self) -> None:
     super(TestUser, self).setUp()
     self.user = User(first_name='test_first',
                      last_name='test_last',
                      name='test_first test_last',
                      email='*****@*****.**',
                      github_username='******',
                      team_name='test_team',
                      employee_type='FTE',
                      manager_email='*****@*****.**',
                      slack_id='slack',
                      is_active=True,
                      updated_at=1,
                      role_name='swe')
Пример #3
0
 def setUp(self) -> None:
     self.maxDiff = None
     super(TestQuery, self).setUp()
     self.user = User(first_name='test_first',
                      last_name='test_last',
                      full_name='test_first test_last',
                      email='*****@*****.**',
                      github_username='******',
                      team_name='test_team',
                      employee_type='FTE',
                      manager_email='*****@*****.**',
                      slack_id='slack',
                      is_active=True,
                      profile_url='https://profile',
                      updated_at=1,
                      role_name='swe')
     self.table_metadata = TableMetadata(
         'hive', 'gold', 'test_schema1', 'test_table1', 'test_table1', [
             ColumnMetadata('test_id1', 'description of test_table1',
                            'bigint', 0),
             ColumnMetadata('test_id2', 'description of test_id2', 'bigint',
                            1),
             ColumnMetadata('is_active', None, 'boolean', 2),
             ColumnMetadata('source', 'description of source', 'varchar',
                            3),
             ColumnMetadata('etl_created_at',
                            'description of etl_created_at', 'timestamp',
                            4),
             ColumnMetadata('ds', None, 'varchar', 5)
         ])
     self.sql = "select * from table"
     self.query_metadata = QueryMetadata(sql=self.sql,
                                         tables=[self.table_metadata],
                                         user=self.user)
     self._query_hash = 'da44ff72560e593a8eca9ffcee6a2696'
    def test_parse_testdata(self) -> None:
        bhr = BamboohrUserExtractor()
        bhr.init(
            ConfigFactory.from_dict({
                'api_key': 'api_key',
                'subdomain': 'amundsen'
            }))

        testdata_xml = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            '../../../resources/extractor/user/bamboohr/testdata.xml')

        with io.open(testdata_xml) as testdata:
            responses.add(responses.GET,
                          bhr._employee_directory_uri(),
                          body=testdata.read())

        expected = User(
            email='*****@*****.**',
            first_name='Roald',
            last_name='Amundsen',
            name='Roald Amundsen',
            team_name='508 Corporate Marketing',
            role_name='Antarctic Explorer',
        )

        actual_users = list(bhr._get_extract_iter())

        self.assertEqual(1, len(actual_users))
        self.assertEqual(repr(expected), repr(actual_users[0]))
Пример #5
0
 def _create_node_iterator(self) -> Iterator[GraphNode]:
     """
     Create an user node
     :return:
     """
     user_node = User(email=self.user_email).get_user_node()
     yield user_node
Пример #6
0
    def create_nodes(self) -> List[GraphNode]:
        """
        Create a list of Neo4j node records
        :return:
        """

        return User(email=self.user_email).create_nodes()
Пример #7
0
 def _create_node_iterator(self) -> Iterator[GraphNode]:
     for email in self.owner_emails:
         if email:
             yield GraphNode(key=User.get_user_model_key(email=email),
                             label=User.USER_NODE_LABEL,
                             attributes={
                                 User.USER_NODE_EMAIL: email,
                             })
Пример #8
0
 def _create_record_iterator(self) -> Iterator[RDSModel]:
     yield RDSDashboardOwner(
         user_rk=User.get_user_model_key(email=self._email),
         dashboard_rk=DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
             product=self._product,
             cluster=self._cluster,
             dashboard_group=self._dashboard_group_id,
             dashboard_name=self._dashboard_id))
Пример #9
0
    def create_nodes(self):
        # type: () -> List[Dict[str, Any]]
        """
        Create a list of Neo4j node records
        :return:
        """

        return User(email=self.user_email).create_nodes()
Пример #10
0
 def test_create_node_additional_attr(self):
     test_user = User(first_name='test_first',
                      last_name='test_last',
                      name='test_first test_last',
                      email='*****@*****.**',
                      github_username='******',
                      team_name='test_team',
                      employee_type='FTE',
                      manager_email='*****@*****.**',
                      slack_id='slack',
                      is_active=True,
                      updated_at=1,
                      role_name='swe',
                      enable_notify=True)
     nodes = test_user.create_nodes()
     self.assertEqual(nodes[0]['email'], '*****@*****.**')
     self.assertEqual(nodes[0]['role_name'], 'swe')
     self.assertTrue(nodes[0]['enable_notify'])
Пример #11
0
 def test_create_record_additional_attr_mysql(self) -> None:
     test_user = User(first_name='test_first',
                      last_name='test_last',
                      name='test_first test_last',
                      email='*****@*****.**',
                      github_username='******',
                      team_name='test_team',
                      employee_type='FTE',
                      manager_email='*****@*****.**',
                      slack_id='slack',
                      is_active=True,
                      updated_at=1,
                      role_name='swe',
                      enable_notify=True)
     record = test_user.create_next_record()
     serialized_record = mysql_serializer.serialize_record(record)
     self.assertEqual(serialized_record['email'], '*****@*****.**')
     self.assertEqual(serialized_record['role_name'], 'swe')
Пример #12
0
    def _create_record_iterator(self) -> Iterator[RDSModel]:
        user_record = User(email=self.user_email).get_user_record()
        yield user_record

        table_usage_record = RDSTableUsage(
            user_rk=self._get_user_key(self.user_email),
            table_rk=self._get_table_key(),
            read_count=self.read_count
        )
        yield table_usage_record
Пример #13
0
 def test_create_node_additional_attr_neptune(self) -> None:
     test_user = User(first_name='test_first',
                      last_name='test_last',
                      name='test_first test_last',
                      email='*****@*****.**',
                      github_username='******',
                      team_name='test_team',
                      employee_type='FTE',
                      manager_email='*****@*****.**',
                      slack_id='slack',
                      is_active=True,
                      updated_at=1,
                      role_name='swe',
                      enable_notify=True)
     nodes = test_user.create_nodes()
     serialized_node = neptune_serializer.convert_node(nodes[0])
     self.assertEqual(serialized_node['email:String(single)'], '*****@*****.**')
     self.assertEqual(serialized_node['role_name:String(single)'], 'swe')
     self.assertTrue(serialized_node['enable_notify:Bool(single)'])
Пример #14
0
    def _create_record_iterator(self) -> Iterator[RDSModel]:
        if self.user_email:
            yield RDSUser(
                rk=User.get_user_model_key(email=self.user_email),
                email=self.user_email
            )

        if self.start_label == TableMetadata.TABLE_NODE_LABEL:
            yield RDSTableUsage(user_rk=User.get_user_model_key(email=self.user_email),
                                table_rk=self.start_key,
                                read_count=self.read_count)
        elif self.start_label == DashboardMetadata.DASHBOARD_NODE_LABEL:
            yield RDSDashboardUsage(
                user_rk=User.get_user_model_key(email=self.user_email),
                dashboard_rk=self.start_key,
                read_count=self.read_count,
            )
        else:
            raise Exception(f'{self.start_label} usage is not table serializable')
Пример #15
0
    def _create_atlas_owner_relation(self, owner: str) -> AtlasRelationship:
        table_relationship = AtlasRelationship(
            relationshipType=AtlasRelationshipTypes.resource_owner,
            entityType1=AtlasCommonTypes.data_set,
            entityQualifiedName1=self.start_key,
            entityType2=AtlasCommonTypes.user,
            entityQualifiedName2=User.get_user_model_key(email=owner),
            attributes={})

        return table_relationship
Пример #16
0
 def test_create_node_additional_attr(self) -> None:
     test_user = User(first_name='test_first',
                      last_name='test_last',
                      full_name='test_first test_last',
                      email='*****@*****.**',
                      github_username='******',
                      team_name='test_team',
                      employee_type='FTE',
                      manager_email='*****@*****.**',
                      slack_id='slack',
                      is_active=True,
                      updated_at=1,
                      role_name='swe',
                      enable_notify=True)
     node = test_user.create_next_node()
     serialized_node = neo4_serializer.serialize_node(node)
     self.assertEqual(serialized_node['email'], '*****@*****.**')
     self.assertEqual(serialized_node['role_name'], 'swe')
     self.assertTrue(serialized_node['enable_notify:UNQUOTED'])
Пример #17
0
    def _create_record_iterator(self) -> Iterator[RDSModel]:
        for email in self.owner_emails:
            if email:
                user_record = RDSUser(rk=User.get_user_model_key(email=email),
                                      email=email)
                yield user_record

                if self.start_label == TableMetadata.TABLE_NODE_LABEL:
                    yield RDSTableOwner(
                        table_rk=self.start_key,
                        user_rk=User.get_user_model_key(email=email),
                    )
                elif self.start_label == DashboardMetadata.DASHBOARD_NODE_LABEL:
                    yield RDSDashboardOwner(
                        dashboard_rk=self.start_key,
                        user_rk=User.get_user_model_key(email=email))
                else:
                    raise Exception(
                        f'{self.start_label}<>Owner relationship is not table serializable'
                    )
Пример #18
0
 def _create_relation_iterator(self) -> Iterator[GraphRelationship]:
     for email in self.owner_emails:
         if email:
             yield GraphRelationship(
                 start_label=self.start_label,
                 start_key=self.start_key,
                 end_label=User.USER_NODE_LABEL,
                 end_key=User.get_user_model_key(email=email),
                 type=OWNER_RELATION_TYPE,
                 reverse_type=OWNER_OF_OBJECT_RELATION_TYPE,
                 attributes={})
Пример #19
0
 def _create_relation_iterator(self) -> Iterator[GraphRelationship]:
     yield GraphRelationship(
         start_label=self.start_label,
         start_key=self.start_key,
         end_label=User.USER_NODE_LABEL,
         end_key=User.get_user_model_key(email=self.user_email),
         type=READ_REVERSE_RELATION_TYPE,
         reverse_type=READ_RELATION_TYPE,
         attributes={
             READ_RELATION_COUNT_PROPERTY: self.read_count,
         }
     )
Пример #20
0
    def _create_record_iterator(self) -> Iterator[RDSModel]:
        for col_reader in self.col_readers:
            if col_reader.column == '*':
                user_record = User(
                    email=col_reader.user_email).get_user_record()
                yield user_record

            table_usage_record = RDSTableUsage(
                user_rk=self._get_user_key(col_reader.user_email),
                table_rk=self._get_table_key(col_reader),
                read_count=col_reader.read_count)
            yield table_usage_record
Пример #21
0
    def test_not_including_empty_attribute(self) -> None:
        test_user = User(email='*****@*****.**', foo='bar')

        self.assertDictEqual(
            neo4_serializer.serialize_node(test_user.create_next_node()), {
                'KEY': '*****@*****.**',
                'LABEL': 'User',
                'email': '*****@*****.**',
                'is_active:UNQUOTED': True,
                'profile_url': '',
                'first_name': '',
                'last_name': '',
                'full_name': '',
                'github_username': '',
                'team_name': '',
                'employee_type': '',
                'slack_id': '',
                'role_name': '',
                'updated_at:UNQUOTED': 0,
                'foo': 'bar'
            })

        test_user2 = User(email='*****@*****.**',
                          foo='bar',
                          is_active=False,
                          do_not_update_empty_attribute=True)

        self.assertDictEqual(
            neo4_serializer.serialize_node(test_user2.create_next_node()), {
                'KEY': '*****@*****.**',
                'LABEL': 'User',
                'email': '*****@*****.**',
                'foo': 'bar'
            })
 def _create_relation_iterator(self) -> Iterator[GraphRelationship]:
     relationship = GraphRelationship(
         start_label=DashboardMetadata.DASHBOARD_NODE_LABEL,
         end_label=User.USER_NODE_LABEL,
         start_key=DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
             product=self._product,
             cluster=self._cluster,
             dashboard_group=self._dashboard_group_id,
             dashboard_name=self._dashboard_id),
         end_key=User.get_user_model_key(email=self._email),
         type=READ_REVERSE_RELATION_TYPE,
         reverse_type=READ_RELATION_TYPE,
         attributes={READ_RELATION_COUNT_PROPERTY: self._view_count})
     yield relationship
Пример #23
0
 def _create_relation_iterator(self) -> Iterator[Dict[str, Any]]:
     yield {
         RELATION_START_LABEL: DashboardMetadata.DASHBOARD_NODE_LABEL,
         RELATION_END_LABEL: User.USER_NODE_LABEL,
         RELATION_START_KEY: DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
             product=self._product,
             cluster=self._cluster,
             dashboard_group=self._dashboard_group_id,
             dashboard_name=self._dashboard_id
         ),
         RELATION_END_KEY: User.get_user_model_key(email=self._email),
         RELATION_TYPE: READ_REVERSE_RELATION_TYPE,
         RELATION_REVERSE_TYPE: READ_RELATION_TYPE,
         READ_RELATION_COUNT_PROPERTY: self._view_count
     }
    def _create_record_iterator(self) -> Iterator[RDSModel]:
        if self._should_create_user_node:
            user_record = self._user_model.create_next_record()
            if user_record:
                yield user_record

        dashboard_usage_record = RDSDashboardUsage(
            user_rk=User.get_user_model_key(email=self._email),
            dashboard_rk=DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
                product=self._product,
                cluster=self._cluster,
                dashboard_group=self._dashboard_group_id,
                dashboard_name=self._dashboard_id),
            read_count=self._view_count)
        yield dashboard_usage_record
Пример #25
0
 def _create_relation_iterator(self):
     # type: () -> Iterator[[Dict[str, Any]]]
     yield {
         RELATION_START_LABEL: DashboardMetadata.DASHBOARD_NODE_LABEL,
         RELATION_END_LABEL: User.USER_NODE_LABEL,
         RELATION_START_KEY: DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
             product=self._product,
             cluster=self._cluster,
             dashboard_group=self._dashboard_group_id,
             dashboard_name=self._dashboard_id
         ),
         RELATION_END_KEY: User.get_user_model_key(email=self._email),
         RELATION_TYPE: OWNER_RELATION_TYPE,
         RELATION_REVERSE_TYPE: OWNER_OF_OBJECT_RELATION_TYPE
     }
Пример #26
0
    def _get_extract_iter(self) -> Iterator[User]:
        response = requests.get(self._employee_directory_uri(),
                                auth=HTTPBasicAuth(self._api_key, 'x'))

        root = ElementTree.fromstring(response.content)

        for user in root.findall('./employees/employee'):

            def get_field(name: str) -> str:
                field = user.find('./field[@id=\'{name}\']'.format(name=name))
                if field is not None and field.text is not None:
                    return field.text
                else:
                    return ''

            yield User(
                email=get_field('workEmail'),
                first_name=get_field('firstName'),
                last_name=get_field('lastName'),
                name=get_field('displayName'),
                team_name=get_field('department'),
                role_name=get_field('jobTitle'),
            )
class DashboardUsage(GraphSerializable, TableSerializable):
    """
    A model that encapsulate Dashboard usage between Dashboard and User
    """
    def __init__(self,
                 dashboard_group_id: Optional[str],
                 dashboard_id: Optional[str],
                 email: str,
                 view_count: int,
                 should_create_user_node: Optional[bool] = False,
                 product: Optional[str] = '',
                 cluster: Optional[str] = 'gold',
                 **kwargs: Any) -> None:
        """

        :param dashboard_group_id:
        :param dashboard_id:
        :param email:
        :param view_count:
        :param should_create_user_node: Enable this if it is fine to create/update User node with only with email
        address. Please be advised that other fields will be emptied. Current use case is to create anonymous user.
        For example, Mode dashboard does not provide which user viewed the dashboard and anonymous user can be used
        to show the usage.
        :param product:
        :param cluster:
        :param kwargs:
        """
        self._dashboard_group_id = dashboard_group_id
        self._dashboard_id = dashboard_id
        self._email = email
        self._view_count = int(view_count)
        self._product = product
        self._cluster = cluster
        self._user_model = User(email=email)
        self._should_create_user_node = bool(should_create_user_node)
        self._relation_iterator = self._create_relation_iterator()
        self._record_iterator = self._create_record_iterator()

    def create_next_node(self) -> Union[GraphNode, None]:
        if self._should_create_user_node:
            return self._user_model.create_next_node()

        return None

    def create_next_relation(self) -> Union[GraphRelationship, None]:
        try:
            return next(self._relation_iterator)
        except StopIteration:
            return None

    def _create_relation_iterator(self) -> Iterator[GraphRelationship]:
        relationship = GraphRelationship(
            start_label=DashboardMetadata.DASHBOARD_NODE_LABEL,
            end_label=User.USER_NODE_LABEL,
            start_key=DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
                product=self._product,
                cluster=self._cluster,
                dashboard_group=self._dashboard_group_id,
                dashboard_name=self._dashboard_id),
            end_key=User.get_user_model_key(email=self._email),
            type=READ_REVERSE_RELATION_TYPE,
            reverse_type=READ_RELATION_TYPE,
            attributes={READ_RELATION_COUNT_PROPERTY: self._view_count})
        yield relationship

    def create_next_record(self) -> Union[RDSModel, None]:
        try:
            return next(self._record_iterator)
        except StopIteration:
            return None

    def _create_record_iterator(self) -> Iterator[RDSModel]:
        if self._should_create_user_node:
            user_record = self._user_model.create_next_record()
            if user_record:
                yield user_record

        dashboard_usage_record = RDSDashboardUsage(
            user_rk=User.get_user_model_key(email=self._email),
            dashboard_rk=DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
                product=self._product,
                cluster=self._cluster,
                dashboard_group=self._dashboard_group_id,
                dashboard_name=self._dashboard_id),
            read_count=self._view_count)
        yield dashboard_usage_record

    def __repr__(self) -> str:
        return f'DashboardUsage({self._dashboard_group_id!r}, {self._dashboard_id!r}, ' \
               f'{self._email!r}, {self._view_count!r}, {self._should_create_user_node!r}, ' \
               f'{self._product!r}, {self._cluster!r})'
Пример #28
0
class DashboardUsage(Neo4jCsvSerializable):
    """
    A model that encapsulate Dashboard usage between Dashboard and User
    """
    def __init__(
            self,
            dashboard_group_id,  # type: Optional[str]
            dashboard_id,  # type: Optional[str]
            email,  # type: str
            view_count,  # type: int
            should_create_user_node=False,  # type: Optional[bool]
            product='',  # type: Optional[str]
            cluster='gold',  # type: Optional[str]
            **kwargs):
        # type: () -> None
        """

        :param dashboard_group_id:
        :param dashboard_id:
        :param email:
        :param view_count:
        :param should_create_user_node: Enable this if it is fine to create/update User node with only with email
        address. Please be advised that other fields will be emptied. Current use case is to create anonymous user.
        For example, Mode dashboard does not provide which user viewed the dashboard and anonymous user can be used
        to show the usage.
        :param product:
        :param cluster:
        :param kwargs:
        """
        self._dashboard_group_id = dashboard_group_id
        self._dashboard_id = dashboard_id
        self._email = email
        self._view_count = view_count
        self._product = product
        self._cluster = cluster
        self._user_model = User(email=email)
        self._should_create_user_node = bool(should_create_user_node)
        self._relation_iterator = self._create_relation_iterator()

    def create_next_node(self):
        # type: () -> Union[Dict[str, Any], None]
        if self._should_create_user_node:
            return self._user_model.create_next_node()

    def create_next_relation(self):
        # type: () -> Union[Dict[str, Any], None]
        try:
            return next(self._relation_iterator)
        except StopIteration:
            return None

    def _create_relation_iterator(self):
        # type: () -> Iterator[[Dict[str, Any]]]

        yield {
            RELATION_START_LABEL:
            DashboardMetadata.DASHBOARD_NODE_LABEL,
            RELATION_END_LABEL:
            User.USER_NODE_LABEL,
            RELATION_START_KEY:
            DashboardMetadata.DASHBOARD_KEY_FORMAT.format(
                product=self._product,
                cluster=self._cluster,
                dashboard_group=self._dashboard_group_id,
                dashboard_name=self._dashboard_id),
            RELATION_END_KEY:
            User.get_user_model_key(email=self._email),
            RELATION_TYPE:
            READ_REVERSE_RELATION_TYPE,
            RELATION_REVERSE_TYPE:
            READ_RELATION_TYPE,
            READ_RELATION_COUNT_PROPERTY:
            self._view_count
        }

    def __repr__(self):
        return 'DashboardUsage({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})'.format(
            self._dashboard_group_id, self._dashboard_id, self._email,
            self._view_count, self._should_create_user_node, self._product,
            self._cluster)
Пример #29
0
 def _get_user_key(self, email):
     # type: (str) -> str
     return User.get_user_model_key(email=email)
Пример #30
0
 def _create_node_iterator(self) -> Iterator[GraphNode]:
     for col_reader in self.col_readers:
         if col_reader.column == '*':
             # using yield for better memory efficiency
             user_node = User(email=col_reader.user_email).get_user_node()
             yield user_node