Пример #1
0
    def _load_test_dataset(self):
        import datetime

        self.db.clear()

        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        # Add organizations
        api.add_organization(self.db, 'Example')
        api.add_domain(self.db, 'Example', 'example.com', is_top_domain=True)
        api.add_domain(self.db, 'Example', 'example.net', is_top_domain=True)

        api.add_organization(self.db, 'Bitergia')
        api.add_domain(self.db, 'Bitergia', 'bitergia.net', is_top_domain=True)
        api.add_domain(self.db, 'Bitergia', 'bitergia.com', is_top_domain=True)
        api.add_domain(self.db, 'Bitergia', 'api.bitergia.com', is_top_domain=False)
        api.add_domain(self.db, 'Bitergia', 'test.bitergia.com', is_top_domain=False)

        api.add_organization(self.db, 'Unknown')

        # Add John Smith identity
        jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid=jsmith_uuid)
        api.edit_profile(self.db, jsmith_uuid, email='*****@*****.**', is_bot=True)

        # Add Joe Roe identity
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane Roe', 'jroe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid=jroe_uuid)
        api.add_identity(self.db, 'unknown', '*****@*****.**',
                         uuid=jroe_uuid)
        api.edit_profile(self.db, jroe_uuid, name='Jane Roe', email='*****@*****.**',
                         is_bot=False, country_code='US')

        # Add unique identity, this one won't have neither identities
        # nor enrollments
        api.add_unique_identity(self.db,
                                '0000000000000000000000000000000000000000')

        # Add enrollments
        api.add_enrollment(self.db, jsmith_uuid, 'Example')

        api.add_enrollment(self.db, jroe_uuid, 'Example')
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(2006, 1, 1),
                           datetime.datetime(2008, 1, 1))

        # Add blacklist
        api.add_to_matching_blacklist(self.db, '*****@*****.**')
        api.add_to_matching_blacklist(self.db, 'John Smith')
Пример #2
0
    def load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        api.add_unique_identity(self.db, 'John Smith')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid='John Smith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid='John Smith')
        api.edit_profile(self.db, 'John Smith', name='John Smith', is_bot=False)

        api.add_unique_identity(self.db, 'John Doe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid='John Doe')
        api.edit_profile(self.db, 'John Doe', email='*****@*****.**', is_bot=True,
                         country_code='US')

        api.add_organization(self.db, 'Example')
        api.add_enrollment(self.db, 'John Smith', 'Example')
        api.add_enrollment(self.db, 'John Doe', 'Example')

        api.add_organization(self.db, 'Bitergia')
        api.add_enrollment(self.db, 'John Smith', 'Bitergia')
        api.add_enrollment(self.db, 'John Doe', 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))

        api.add_organization(self.db, 'LibreSoft')
Пример #3
0
    def test_valid_identities_already_exist(self):
        """Check method when an identity already exists but with distinct UUID"""

        # The identity already exists but with a different UUID
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_identity(self.db, source='scm', email='*****@*****.**',
                         name='John Smith', username='******', uuid=uuid)
        api.edit_profile(self.db, uuid, name='John Smith', is_bot=False,
                         country_code='US')

        parser = self.get_parser('data/sortinghat_valid.json')

        code = self.cmd.import_identities(parser)
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[0]
        self.assertEqual(uid.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')

        # The profile was not updated because it was already available
        prf = uid.profile
        self.assertEqual(prf.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, None)
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '03e12d00e37fd45593c49a5a5a1652deca4cf302')
        self.assertEqual(id0.name, 'John Smith')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jsmith')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '75d95d6c8492fd36d24a18bd45d62161e05fbc97')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')
Пример #4
0
    def test_valid_identities_already_exist(self):
        """Check method when an identity already exists but with distinct UUID"""

        # The identity already exists but with a different UUID
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_identity(self.db, source='scm', email='*****@*****.**',
                         name='John Smith', username='******', uuid=uuid)
        api.edit_profile(self.db, uuid, name='John Smith', is_bot=False,
                         country_code='US')

        parser = self.get_parser(datadir('sortinghat_valid.json'))

        code = self.cmd.import_identities(parser)
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        # The profile is updated because a new one was given
        prf = uid.profile
        self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(prf.name, None)
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 100)
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country, None)

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        id1 = ids[1]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[2]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')
    def test_valid_identities_already_exist(self):
        """Check method when an identity already exists but with distinct UUID"""

        # The identity already exists but with a different UUID
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_identity(self.db, source='scm', email='*****@*****.**',
                         name='John Smith', username='******', uuid=uuid)
        api.edit_profile(self.db, uuid, name='John Smith', is_bot=False,
                         country_code='US')

        parser = self.get_parser('data/sortinghat_valid.json')

        code = self.cmd.import_identities(parser)
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        # The profile is updated because a new one was given
        prf = uid.profile
        self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(prf.name, None)
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country, None)

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        id1 = ids[1]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[2]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')
Пример #6
0
    def run(self):
        if self.unify:
            for algo in self.conf['sh_matching']:
                kwargs = {'matching': algo, 'fast_matching': True}
                logger.info(
                    "[sortinghat] Unifying identities using algorithm %s",
                    kwargs['matching'])
                code = Unify(**self.sh_kwargs).unify(**kwargs)
                if code != CMD_SUCCESS:
                    logger.error("[sortinghat] Error in unify %s", kwargs)

        if self.affiliate:
            # Global enrollments using domains
            logger.info("[sortinghat] Executing affiliate")
            code = Affiliate(**self.sh_kwargs).affiliate()
            if code != CMD_SUCCESS:
                logger.error("[sortinghat] Error in affiliate %s", kwargs)

        if self.autoprofile:
            if not 'sh_autoprofile' in self.conf:
                logger.info(
                    "[sortinghat] Autoprofile not configured. Skipping.")
            else:
                logger.info("[sortinghat] Executing autoprofile: %s",
                            self.conf['sh_autoprofile'])
                sources = self.conf['sh_autoprofile']
                code = AutoProfile(**self.sh_kwargs).autocomplete(sources)
                if code != CMD_SUCCESS:
                    logger.error("Error in autoprofile %s", kwargs)

        if self.bots:
            if not 'sh_bots_names' in self.conf:
                logger.info(
                    "[sortinghat] Bots name list not configured. Skipping.")
            else:
                logger.info("[sortinghat] Marking bots: %s",
                            self.conf['sh_bots_names'])
                for name in self.conf['sh_bots_names']:
                    # First we need the uuids for the profile name
                    uuids = self.__get_uuids_from_profile_name(name)
                    # Then we can modify the profile setting bot flag
                    profile = {"is_bot": True}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)
                # For quitting the bot flag - debug feature
                if 'sh_no_bots_names' in self.conf:
                    logger.info("[sortinghat] Removing Marking bots: %s",
                                self.conf['sh_no_bots_names'])
                    for name in self.conf['sh_no_bots_names']:
                        uuids = self.__get_uuids_from_profile_name(name)
                        profile = {"is_bot": False}
                        for uuid in uuids:
                            api.edit_profile(self.db, uuid, **profile)
Пример #7
0
    def add_identity(cls, db, identity, backend):
        """ Load and identity list from backend in Sorting Hat """
        uuid = None

        try:
            uuid = api.add_identity(db, backend, identity['email'],
                                    identity['name'], identity['username'])

            logger.debug("New sortinghat identity %s %s,%s,%s ", uuid,
                         identity['username'], identity['name'],
                         identity['email'])

            profile = {
                "name":
                identity['name'] if identity['name'] else identity['username'],
                "email":
                identity['email']
            }

            api.edit_profile(db, uuid, **profile)

        except AlreadyExistsError as ex:
            uuid = ex.eid
        except InvalidValueError as ex:
            logger.warning("Trying to add a None identity. Ignoring it.")
        except UnicodeEncodeError as ex:
            logger.warning("UnicodeEncodeError. Ignoring it. %s %s %s",
                           identity['email'], identity['name'],
                           identity['username'])
        except Exception as ex:
            logger.warning(
                "Unknown exception adding identity. Ignoring it. %s %s %s",
                identity['email'],
                identity['name'],
                identity['username'],
                exc_info=True)

        if 'company' in identity and identity['company'] is not None:
            try:
                api.add_organization(db, identity['company'])
                api.add_enrollment(db, uuid, identity['company'],
                                   datetime(1900, 1, 1), datetime(2100, 1, 1))
            except AlreadyExistsError:
                pass

        return uuid
Пример #8
0
    def add_identity(cls, db, identity, backend):
        """ Load and identity list from backend in Sorting Hat """
        uuid = None

        try:
            uuid = api.add_identity(db, backend, identity['email'],
                                    identity['name'], identity['username'])

            logger.debug("New sortinghat identity %s %s,%s,%s ",
                         uuid, identity['username'], identity['name'], identity['email'])

            profile = {"name": identity['name'] if identity['name'] else identity['username'],
                       "email": identity['email']}

            api.edit_profile(db, uuid, **profile)

        except AlreadyExistsError as ex:
            uuid = ex.uuid
        except WrappedValueError as ex:
            logger.warning("Trying to add a None identity. Ignoring it.")
        except UnicodeEncodeError as ex:
            logger.warning("UnicodeEncodeError. Ignoring it. %s %s %s",
                           identity['email'], identity['name'],
                           identity['username'])
        except Exception as ex:
            logger.warning("Unknown exception adding identity. Ignoring it. %s %s %s",
                           identity['email'], identity['name'],
                           identity['username'])
            traceback.print_exc()

        if 'company' in identity and identity['company'] is not None:
            try:
                api.add_organization(db, identity['company'])
                api.add_enrollment(db, uuid, identity['company'],
                                   datetime(1900, 1, 1),
                                   datetime(2100, 1, 1))
            except AlreadyExistsError:
                pass

        return uuid
Пример #9
0
    def load_test_dataset(self):
        # Add identities
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane', 'jroe')
        api.edit_profile(self.db, jroe_uuid, name="Jane Roe")

        jrae_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane', 'jrae')
        api.edit_profile(self.db, jrae_uuid, name="Jane R", gender="unknown")

        jsmith_uuid = api.add_identity(self.db, 'mls', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.edit_profile(self.db, jsmith_uuid, name="John Smith")

        jdoe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'John D', 'jdoe')
        api.edit_profile(self.db, jdoe_uuid, name="John D")
Пример #10
0
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db,
                         uuid,
                         name='John Smith',
                         is_bot=False,
                         country_code='US')

        parser = self.get_parser(datadir('sortinghat_valid.json'))

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # Jane Roe
        uid = uids[0]
        self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')

        prf = uid.profile
        self.assertEqual(prf.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '22d1b20763c6f5822bdda8508957486c547bb9de')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '322397ed782a798ffd9d0bc7e293df4292fe075d')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)

        # John Smith
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 100)
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        id1 = ids[1]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[2]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))
Пример #11
0
    def add_identities(cls, db, identities, backend):
        """ Load identities list from backend in Sorting Hat """

        merge_identities = False

        logger.info("Adding the identities to SortingHat")
        if not merge_identities:
            logger.info("Not doing identities merge")

        total = 0
        lidentities = len(identities)

        if merge_identities:
            merged_identities = []  # old identities merged into new ones
            blacklist = api.blacklist(db)
            matching = 'email-name'  # Not active
            matcher = create_identity_matcher(matching, blacklist)

        for identity in identities:
            try:
                uuid = api.add_identity(db, backend, identity['email'],
                                        identity['name'], identity['username'])

                logger.debug("New sortinghat identity %s %s,%s,%s (%i/%i)",
                             uuid, identity['username'], identity['name'],
                             identity['email'], total, lidentities)

                profile = {
                    "name":
                    identity['name']
                    if identity['name'] else identity['username'],
                    "email":
                    identity['email']
                }

                api.edit_profile(db, uuid, **profile)

                total += 1
                if not merge_identities:
                    continue  # Don't do the merge here. Too slow in large projects

                # Time to  merge
                matches = api.match_identities(db, uuid, matcher)

                if len(matches) > 1:
                    u = api.unique_identities(db, uuid)[0]
                    for m in matches:
                        # First add the old uuid to the list of changed by merge uuids
                        if m.uuid not in merged_identities:
                            merged_identities.append(m.uuid)
                        if m.uuid == uuid:
                            continue
                        # Merge matched identity into added identity
                        api.merge_unique_identities(db, m.uuid, u.uuid)
                        # uuid = m.uuid
                        # u = api.unique_identities(db, uuid, backend)[0]
                        # Include all identities related to this uuid
                        # merged_identities.append(m.uuid)

            except AlreadyExistsError as ex:
                uuid = ex.uuid
                continue
            except WrappedValueError as ex:
                logging.warning("Trying to add a None identity. Ignoring it.")
                continue
            except UnicodeEncodeError as ex:
                logging.warning("UnicodeEncodeError. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                continue
            except Exception as ex:
                logging.warning("Unknown exception adding identity. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                traceback.print_exc()
                continue

            if 'company' in identity and identity['company'] is not None:
                try:
                    api.add_organization(db, identity['company'])
                    api.add_enrollment(db, uuid, identity['company'],
                                       datetime(1900, 1, 1),
                                       datetime(2100, 1, 1))
                except AlreadyExistsError:
                    pass

        logger.info("Total NEW identities: %i" % (total))

        if merge_identities:
            logger.info("Total NEW identities merged: %i" % \
                        (len(merged_identities)))
            return merged_identities
        else:
            return []
Пример #12
0
    def execute(self):

        # ** START SYNC LOGIC **
        # Check that enrichment tasks are not active before loading identities
        while True:
            time.sleep(1)  # check each second if the task could start
            with TasksManager.IDENTITIES_TASKS_ON_LOCK:
                with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                    enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON
                    logger.debug("[unify] Enrich tasks active: %i",
                                 enrich_tasks)
                    if enrich_tasks == 0:
                        # The load of identities can be started
                        TasksManager.IDENTITIES_TASKS_ON = True
                        break
        #  ** END SYNC LOGIC **

        cfg = self.config.get_conf()

        uuids_refresh = []

        for algo in cfg['sortinghat']['matching']:
            if not algo:
                # cfg['sortinghat']['matching'] is an empty list
                logger.debug('Unify not executed because empty algorithm')
                continue
            kwargs = {
                'matching': algo,
                'fast_matching': True,
                'strict_mapping': cfg['sortinghat']['strict_mapping']
            }
            logger.info("[sortinghat] Unifying identities using algorithm %s",
                        kwargs['matching'])
            self.do_unify(kwargs)

        if not cfg['sortinghat']['affiliate']:
            logger.debug("Not doing affiliation")
        else:
            # Global enrollments using domains
            logger.info("[sortinghat] Executing affiliate")
            self.do_affiliate()

        if 'autoprofile' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autoprofile'][0]:
            logger.info("[sortinghat] Autoprofile not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autoprofile for sources: %s",
                        cfg['sortinghat']['autoprofile'])
            sources = cfg['sortinghat']['autoprofile']
            self.do_autoprofile(sources)

        if 'autogender' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autogender']:
            logger.info("[sortinghat] Autogender not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autogender")
            self.do_autogender()

        if 'bots_names' not in cfg['sortinghat']:
            logger.info(
                "[sortinghat] Bots name list not configured. Skipping.")
        else:
            logger.info("[sortinghat] Marking bots: %s",
                        cfg['sortinghat']['bots_names'])
            for name in cfg['sortinghat']['bots_names']:
                # First we need the uuids for the profile name
                uuids = self.__get_uuids_from_profile_name(name)
                # Then we can modify the profile setting bot flag
                profile = {"is_bot": True}
                for uuid in uuids:
                    api.edit_profile(self.db, uuid, **profile)
            # For quitting the bot flag - debug feature
            if 'no_bots_names' in cfg['sortinghat']:
                logger.info("[sortinghat] Removing Marking bots: %s",
                            cfg['sortinghat']['no_bots_names'])
                for name in cfg['sortinghat']['no_bots_names']:
                    uuids = self.__get_uuids_from_profile_name(name)
                    profile = {"is_bot": False}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)

        with TasksManager.IDENTITIES_TASKS_ON_LOCK:
            TasksManager.IDENTITIES_TASKS_ON = False
Пример #13
0
    def test_valid_identities_already_exist(self):
        """Check method when an identity already exists but with distinct UUID"""

        # The identity already exists but with a different UUID
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_identity(self.db,
                         source='scm',
                         email='*****@*****.**',
                         name='John Smith',
                         username='******',
                         uuid=uuid)
        api.edit_profile(self.db,
                         uuid,
                         name='John Smith',
                         is_bot=False,
                         country_code='US')

        parser = self.get_parser('data/sortinghat_valid.json')

        code = self.cmd.import_identities(parser)
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[0]
        self.assertEqual(uid.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')

        # The profile was not updated because it was already available
        prf = uid.profile
        self.assertEqual(prf.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, None)
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '03e12d00e37fd45593c49a5a5a1652deca4cf302')
        self.assertEqual(id0.name, 'John Smith')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jsmith')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '75d95d6c8492fd36d24a18bd45d62161e05fbc97')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')
Пример #14
0
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db,
                         uuid,
                         name='John Smith',
                         is_bot=False,
                         country_code='US')

        parser = self.get_parser('data/sortinghat_valid.json')

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[0]
        self.assertEqual(uid.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '03e12d00e37fd45593c49a5a5a1652deca4cf302')
        self.assertEqual(id0.name, 'John Smith')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jsmith')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '75d95d6c8492fd36d24a18bd45d62161e05fbc97')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))

        # Jane Roe
        uid = uids[1]
        self.assertEqual(uid.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')

        prf = uid.profile
        self.assertEqual(prf.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, 'cbfb7bd31d556322c640f5bc7b31d58a12b15904')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, 'fef873c50a48cfc057f7aa19f423f81889a8907f')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)
Пример #15
0
    def add_identities(cls, db, identities, backend):
        """ Load identities list from backend in Sorting Hat """

        merge_identities = False

        logger.info("Adding the identities to SortingHat")
        if not merge_identities:
            logger.info("Not doing identities merge")

        total = 0
        lidentities = len(identities)


        if merge_identities:
            merged_identities = []  # old identities merged into new ones
            blacklist = api.blacklist(db)
            matching = 'email-name'  # Not active
            matcher = create_identity_matcher(matching, blacklist)

        for identity in identities:
            try:
                uuid = api.add_identity(db, backend, identity['email'],
                                        identity['name'], identity['username'])

                logger.debug("New sortinghat identity %s %s,%s,%s (%i/%i)",
                            uuid, identity['username'], identity['name'], identity['email'],
                            total, lidentities)

                profile = {"name": identity['name'] if identity['name'] else identity['username'],
                           "email": identity['email']}

                api.edit_profile(db, uuid, **profile)

                total += 1
                if not merge_identities:
                    continue  # Don't do the merge here. Too slow in large projects

                # Time to  merge
                matches = api.match_identities(db, uuid, matcher)

                if len(matches) > 1:
                    u = api.unique_identities(db, uuid)[0]
                    for m in matches:
                        # First add the old uuid to the list of changed by merge uuids
                        if m.uuid not in merged_identities:
                            merged_identities.append(m.uuid)
                        if m.uuid == uuid:
                            continue
                        # Merge matched identity into added identity
                        api.merge_unique_identities(db, m.uuid, u.uuid)
                        # uuid = m.uuid
                        # u = api.unique_identities(db, uuid, backend)[0]
                        # Include all identities related to this uuid
                        # merged_identities.append(m.uuid)

            except AlreadyExistsError as ex:
                uuid = ex.uuid
                continue
            except WrappedValueError as ex:
                logging.warning("Trying to add a None identity. Ignoring it.")
                continue
            except UnicodeEncodeError as ex:
                logging.warning("UnicodeEncodeError. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                continue
            except Exception as ex:
                logging.warning("Unknown exception adding identity. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                traceback.print_exc()
                continue

            if 'company' in identity and identity['company'] is not None:
                try:
                    api.add_organization(db, identity['company'])
                    api.add_enrollment(db, uuid, identity['company'],
                                       datetime(1900, 1, 1),
                                       datetime(2100, 1, 1))
                except AlreadyExistsError:
                    pass

        logger.info("Total NEW identities: %i" % (total))

        if merge_identities:
            logger.info("Total NEW identities merged: %i" % \
                        (len(merged_identities)))
            return merged_identities
        else:
            return []
Пример #16
0
    def execute(self):

        # ** START SYNC LOGIC **
        # Check that enrichment tasks are not active before loading identities
        while True:
            time.sleep(1)  # check each second if the task could start
            with TasksManager.IDENTITIES_TASKS_ON_LOCK:
                with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                    enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON
                    logger.debug("[unify] Enrich tasks active: %i", enrich_tasks)
                    if enrich_tasks == 0:
                        # The load of identities can be started
                        TasksManager.IDENTITIES_TASKS_ON = True
                        break
        #  ** END SYNC LOGIC **

        cfg = self.config.get_conf()

        uuids_refresh = []

        for algo in cfg['sortinghat']['matching']:
            if not algo:
                # cfg['sortinghat']['matching'] is an empty list
                logger.debug('Unify not executed because empty algorithm')
                continue
            kwargs = {'matching': algo, 'fast_matching': True,
                      'strict_mapping': cfg['sortinghat']['strict_mapping']}
            logger.info("[sortinghat] Unifying identities using algorithm %s",
                        kwargs['matching'])
            self.do_unify(kwargs)

        if not cfg['sortinghat']['affiliate']:
            logger.debug("Not doing affiliation")
        else:
            # Global enrollments using domains
            logger.info("[sortinghat] Executing affiliate")
            self.do_affiliate()

        if 'autoprofile' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autoprofile'][0]:
            logger.info("[sortinghat] Autoprofile not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autoprofile for sources: %s",
                        cfg['sortinghat']['autoprofile'])
            sources = cfg['sortinghat']['autoprofile']
            self.do_autoprofile(sources)

        if 'autogender' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autogender']:
            logger.info("[sortinghat] Autogender not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autogender")
            self.do_autogender()

        if 'bots_names' not in cfg['sortinghat']:
            logger.info("[sortinghat] Bots name list not configured. Skipping.")
        else:
            logger.info("[sortinghat] Marking bots: %s",
                        cfg['sortinghat']['bots_names'])
            for name in cfg['sortinghat']['bots_names']:
                # First we need the uuids for the profile name
                uuids = self.__get_uuids_from_profile_name(name)
                # Then we can modify the profile setting bot flag
                profile = {"is_bot": True}
                for uuid in uuids:
                    api.edit_profile(self.db, uuid, **profile)
            # For quitting the bot flag - debug feature
            if 'no_bots_names' in cfg['sortinghat']:
                logger.info("[sortinghat] Removing Marking bots: %s",
                            cfg['sortinghat']['no_bots_names'])
                for name in cfg['sortinghat']['no_bots_names']:
                    uuids = self.__get_uuids_from_profile_name(name)
                    profile = {"is_bot": False}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)

        with TasksManager.IDENTITIES_TASKS_ON_LOCK:
            TasksManager.IDENTITIES_TASKS_ON = False
Пример #17
0
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db, uuid, name='John Smith', is_bot=False,
                         country_code='US')

        parser = self.get_parser(datadir('sortinghat_valid.json'))

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # Jane Roe
        uid = uids[0]
        self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')

        prf = uid.profile
        self.assertEqual(prf.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '22d1b20763c6f5822bdda8508957486c547bb9de')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '322397ed782a798ffd9d0bc7e293df4292fe075d')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)

        # John Smith
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 100)
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        id1 = ids[1]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[2]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))
Пример #18
0
    def test_retry(self):
        """Test if a name is skipped when a connection error is returned"""

        http_requests = setup_genderize_server()

        # This profile won't be updated due to connection errors
        # In this case, a 502 HTTP error
        uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                'Error Name')
        api.edit_profile(self.db, uuid, name="Error Name")

        # Tests
        self.cmd.autogender(api_token='abcdefghi')

        uids = api.unique_identities(self.db)

        prf = uids[0].profile
        self.assertEqual(prf.uuid, '2a9ec221b8dd5d5a85ae0e3276b8b2c3618ee15e')
        self.assertEqual(prf.gender, 'female')
        self.assertEqual(prf.gender_acc, 100)

        # Error Name profile was not updated due to connection errors
        prf = uids[1].profile
        self.assertEqual(prf.uuid, '316b78ff088c2a825defacb802013fa670fccb48')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)

        # Jane Rae gender is not updated because it was already set
        prf = uids[2].profile
        self.assertEqual(prf.uuid, '3e1eccdb1e52ea56225f419d3e532fe9133c7821')
        self.assertEqual(prf.gender, 'unknown')
        self.assertEqual(prf.gender_acc, 100)

        prf = uids[3].profile
        self.assertEqual(prf.uuid, '539acca35c2e8502951a97d2d5af8b0857440b50')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 99)

        prf = uids[4].profile
        self.assertEqual(prf.uuid, 'a39ac334be9f17bfc7f9f21bbb25f389388f8e18')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 99)

        expected = [
            {
                'name': ['jane'],
                'apikey': ['abcdefghi']
            },
            {
                'name': ['error'],
                'apikey': ['abcdefghi']
            },
            {
                'name': ['john'],
                'apikey': ['abcdefghi']
            }
        ]

        self.assertEqual(len(http_requests), 8)

        req = http_requests[0]
        self.assertEqual(req.method, 'GET')
        self.assertEqual(req.querystring, expected[0])

        for i in range(1, 7):
            req = http_requests[i]
            self.assertEqual(req.method, 'GET')
            self.assertEqual(req.querystring, expected[1])

        req = http_requests[7]
        self.assertEqual(req.method, 'GET')
        self.assertEqual(req.querystring, expected[2])
Пример #19
0
    def test_autogender_ignore_name_not_well_formed(self):
        """Test if no gender is set when a name is invalid"""

        http_requests = setup_genderize_server()

        # These names are invalid so they will be ignored
        uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                'Random Name')
        api.edit_profile(self.db, uuid, name="r4nd0m")

        uuid = api.add_identity(self.db, 'mls', '*****@*****.**',
                                'Another Random Name')
        api.edit_profile(self.db, uuid, name="ARadomName")

        self.cmd.autogender(api_token='abcdefghi')

        uids = api.unique_identities(self.db)

        prf = uids[0].profile
        self.assertEqual(prf.uuid, '2a9ec221b8dd5d5a85ae0e3276b8b2c3618ee15e')
        self.assertEqual(prf.gender, 'female')
        self.assertEqual(prf.gender_acc, 100)

        # Jane Rae gender is not updated because it was already set
        prf = uids[1].profile
        self.assertEqual(prf.uuid, '3e1eccdb1e52ea56225f419d3e532fe9133c7821')
        self.assertEqual(prf.gender, 'unknown')
        self.assertEqual(prf.gender_acc, 100)

        prf = uids[2].profile
        self.assertEqual(prf.uuid, '539acca35c2e8502951a97d2d5af8b0857440b50')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 99)

        prf = uids[3].profile
        self.assertEqual(prf.uuid, 'a39ac334be9f17bfc7f9f21bbb25f389388f8e18')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 99)

        # These names were ignored and their profile were not set either
        prf = uids[4].profile
        self.assertEqual(prf.uuid, 'cfa19ae04ce0c70902a31084fc75086b61ccfcf2')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)

        prf = uids[5].profile
        self.assertEqual(prf.uuid, 'ee48da0af80479e81b846bec13fe238c06772701')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)

        # Check requests.
        # Only two valid names were checked
        expected = [
            {
                'name': ['jane'],
                'apikey': ['abcdefghi']
            },
            {
                'name': ['john'],
                'apikey': ['abcdefghi']
            },
        ]

        self.assertEqual(len(http_requests), len(expected))

        for i in range(len(expected)):
            self.assertDictEqual(http_requests[i].querystring, expected[i])
Пример #20
0
    def test_autogender_name_not_found(self):
        """Test if no gender is set when a name is not found"""

        http_requests = setup_genderize_server()

        # This name won't be found
        uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                'Random Name')
        api.edit_profile(self.db, uuid, name="Random Name")

        self.cmd.autogender(api_token='abcdefghi')

        uids = api.unique_identities(self.db)

        prf = uids[0].profile
        self.assertEqual(prf.uuid, '2a9ec221b8dd5d5a85ae0e3276b8b2c3618ee15e')
        self.assertEqual(prf.gender, 'female')
        self.assertEqual(prf.gender_acc, 100)

        # Jane Rae gender is not updated because it was already set
        prf = uids[1].profile
        self.assertEqual(prf.uuid, '3e1eccdb1e52ea56225f419d3e532fe9133c7821')
        self.assertEqual(prf.gender, 'unknown')
        self.assertEqual(prf.gender_acc, 100)

        prf = uids[2].profile
        self.assertEqual(prf.uuid, '539acca35c2e8502951a97d2d5af8b0857440b50')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 99)

        prf = uids[3].profile
        self.assertEqual(prf.uuid, 'a39ac334be9f17bfc7f9f21bbb25f389388f8e18')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 99)

        # This name was not found and the gender was not updated
        prf = uids[4].profile
        self.assertEqual(prf.uuid, 'cfa19ae04ce0c70902a31084fc75086b61ccfcf2')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)

        # Check requests
        expected = [
            {
                'name': ['jane'],
                'apikey': ['abcdefghi']
            },
            {
                'name': ['john'],
                'apikey': ['abcdefghi']
            },
            {
                'name': ['random'],
                'apikey': ['abcdefghi']
            },
        ]

        self.assertEqual(len(http_requests), len(expected))

        for i in range(len(expected)):
            self.assertDictEqual(http_requests[i].querystring, expected[i])
Пример #21
0
    def execute(self):

        # ** START SYNC LOGIC **
        # Check that enrichment tasks are not active before loading identities
        while True:
            time.sleep(
                10)  # check each 10 seconds if the identities load could start
            with TasksManager.IDENTITIES_TASKS_ON_LOCK:
                with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                    enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON
                    logger.debug("Enrich tasks active: %i", enrich_tasks)
                    if enrich_tasks == 0:
                        # The load of identities can be started
                        TasksManager.IDENTITIES_TASKS_ON = True
                        break
        #  ** END SYNC LOGIC **

        cfg = self.config.get_conf()

        uuids_refresh = []

        if self.unify:
            for algo in cfg['sortinghat']['matching']:
                if not algo:
                    # cfg['sortinghat']['matching'] is an empty list
                    logger.debug('Unify not executed because empty algorithm')
                    continue
                kwargs = {
                    'matching': algo,
                    'fast_matching': True,
                    'strict_mapping': cfg['sortinghat']['strict_mapping']
                }
                logger.info(
                    "[sortinghat] Unifying identities using algorithm %s",
                    kwargs['matching'])
                uuids = self.do_unify(kwargs)
                uuids_refresh += uuids
                logger.debug("uuids to refresh from unify: %s", uuids)

        if self.affiliate:
            if not cfg['sortinghat']['affiliate']:
                logger.debug("Not doing affiliation")
            else:
                # Global enrollments using domains
                logger.info("[sortinghat] Executing affiliate")
                uuids = self.do_affiliate()
                uuids_refresh += uuids
                logger.debug("uuids to refresh from affiliate: %s", uuids)

        if self.autoprofile:
            # autoprofile = [] -> cfg['sortinghat']['autoprofile'][0] = ['']
            if ('autoprofile' not in cfg['sortinghat']
                    or not cfg['sortinghat']['autoprofile'][0]):
                logger.info(
                    "[sortinghat] Autoprofile not configured. Skipping.")
            else:
                logger.info(
                    "[sortinghat] Executing autoprofile for sources: %s",
                    cfg['sortinghat']['autoprofile'])
                sources = cfg['sortinghat']['autoprofile']
                self.do_autoprofile(sources)

        # The uuids must be refreshed in all backends (data sources)
        # Give 5s so the queue is filled and if not, continue without it
        try:
            autorefresh_backends_uuids = TasksManager.UPDATED_UUIDS_QUEUE.get(
                timeout=5)
            for backend_section in autorefresh_backends_uuids:
                autorefresh_backends_uuids[backend_section] += uuids_refresh
            TasksManager.UPDATED_UUIDS_QUEUE.put(autorefresh_backends_uuids)
            logger.debug(
                "Autorefresh uuids queue after processing identities: %s",
                autorefresh_backends_uuids)
        except Empty:
            logger.warning(
                "Autorefresh uuids not active because the queue for it is empty."
            )

        if self.bots:
            if 'bots_names' not in cfg['sortinghat']:
                logger.info(
                    "[sortinghat] Bots name list not configured. Skipping.")
            else:
                logger.info("[sortinghat] Marking bots: %s",
                            cfg['sortinghat']['bots_names'])
                for name in cfg['sortinghat']['bots_names']:
                    # First we need the uuids for the profile name
                    uuids = self.__get_uuids_from_profile_name(name)
                    # Then we can modify the profile setting bot flag
                    profile = {"is_bot": True}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)
                # For quitting the bot flag - debug feature
                if 'no_bots_names' in cfg['sortinghat']:
                    logger.info("[sortinghat] Removing Marking bots: %s",
                                cfg['sortinghat']['no_bots_names'])
                    for name in cfg['sortinghat']['no_bots_names']:
                        uuids = self.__get_uuids_from_profile_name(name)
                        profile = {"is_bot": False}
                        for uuid in uuids:
                            api.edit_profile(self.db, uuid, **profile)

        # Autorefresh must be done once identities processing has finished
        # Give 5s so the queue is filled and if not, continue without it
        try:
            autorefresh_backends = TasksManager.AUTOREFRESH_QUEUE.get(
                timeout=5)
            for backend_section in autorefresh_backends:
                autorefresh_backends[backend_section] = True
            TasksManager.AUTOREFRESH_QUEUE.put(autorefresh_backends)
            logger.debug("Autorefresh queue after processing identities: %s",
                         autorefresh_backends)
        except Empty:
            logger.warning(
                "Autorefresh not active because the queue for it is empty.")

        with TasksManager.IDENTITIES_TASKS_ON_LOCK:
            TasksManager.IDENTITIES_TASKS_ON = False
Пример #22
0
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db, uuid, name='John Smith', is_bot=False,
                         country_code='US')

        parser = self.get_parser('data/sortinghat_valid.json')

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[0]
        self.assertEqual(uid.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '03e12d00e37fd45593c49a5a5a1652deca4cf302')
        self.assertEqual(id0.name, 'John Smith')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jsmith')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '75d95d6c8492fd36d24a18bd45d62161e05fbc97')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))

        # Jane Roe
        uid = uids[1]
        self.assertEqual(uid.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')

        prf = uid.profile
        self.assertEqual(prf.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, 'cbfb7bd31d556322c640f5bc7b31d58a12b15904')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, 'fef873c50a48cfc057f7aa19f423f81889a8907f')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)