class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on countries unit tests"""

    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue') and not hasattr(sys.stderr, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        self._load_test_dataset()

        # Create command
        self.kwargs = {'user' : DB_USER,
                       'password' : DB_PASSWORD,
                       'database' : DB_NAME,
                       'host' : DB_HOST,
                       'port' : DB_PORT}
        self.cmd = Countries(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        with self.db.connect() as session:
            us = Country(code='US', name='United States of America', alpha3='USA')
            es = Country(code='ES', name='Spain', alpha3='ESP')
            gb = Country(code='GB', name='United Kingdom', alpha3='GBR')

            session.add(es)
            session.add(us)
            session.add(gb)
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on countries unit tests"""
    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue') and not hasattr(
                sys.stderr, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        self._load_test_dataset()

        # Create command
        self.kwargs = {
            'user': DB_USER,
            'password': DB_PASSWORD,
            'database': DB_NAME,
            'host': DB_HOST,
            'port': DB_PORT
        }
        self.cmd = Countries(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        with self.db.connect() as session:
            us = Country(code='US',
                         name='United States of America',
                         alpha3='USA')
            es = Country(code='ES', name='Spain', alpha3='ESP')
            gb = Country(code='GB', name='United Kingdom', alpha3='GBR')

            session.add(es)
            session.add(us)
            session.add(gb)
Exemplo n.º 3
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on profile unit tests"""

    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        # Import predefined dataset for testing
        self._load_test_dataset()

        # Create command
        self.kwargs = {'user' : DB_USER,
                       'password' : DB_PASSWORD,
                       'database' :DB_NAME,
                       'host' : DB_HOST,
                       'port' : DB_PORT}
        self.cmd = Profile(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        # Add identity
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         'Jane Roe', 'jroe')
Exemplo n.º 4
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on add unit tests"""

    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue') and not hasattr(sys.stderr, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        self.db.clear()

        self._load_test_dataset()

        # Create command
        self.kwargs = {'user' : DB_USER,
                       'password' : DB_PASSWORD,
                       'database' :DB_NAME,
                       'host' : DB_HOST,
                       'port' : DB_PORT}
        self.cmd = Merge(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        api.add_unique_identity(self.db, 'John Smith')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid='John Smith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid='John Smith')
        api.edit_profile(self.db, 'John Smith', name='John Smith', is_bot=False)

        api.add_unique_identity(self.db, 'John Doe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid='John Doe')
        api.edit_profile(self.db, 'John Doe', email='*****@*****.**', is_bot=True,
                         country_code='US')

        api.add_organization(self.db, 'Example')
        api.add_enrollment(self.db, 'John Smith', 'Example')
        api.add_enrollment(self.db, 'John Doe', 'Example')

        api.add_organization(self.db, 'Bitergia')
        api.add_enrollment(self.db, 'John Smith', 'Bitergia')
        api.add_enrollment(self.db, 'John Doe', 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))

        api.add_organization(self.db, 'LibreSoft')
Exemplo n.º 5
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on show unit tests"""
    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        self._load_test_dataset()

        # Create command
        self.kwargs = {
            'user': DB_USER,
            'password': DB_PASSWORD,
            'database': DB_NAME,
            'host': DB_HOST,
            'port': DB_PORT
        }
        self.cmd = Load(**self.kwargs)

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US',
                         name='United States of America',
                         alpha3='USA')
            session.add(us)

    def tearDown(self):
        self.db.clear()

    def get_parser(self, filename):
        if sys.version_info[0] >= 3:  # Python 3
            with open(filename, 'r', encoding='UTF-8') as f:
                content = f.read()
        else:  # Python 2
            with open(filename, 'r') as f:
                content = f.read().decode('UTF-8')
        return SortingHatParser(content)

    def sort_identities(self, ids):
        return sorted(ids, key=lambda x: x.id)
Exemplo n.º 6
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on show unit tests"""

    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        self._load_test_dataset()

        # Create command
        self.kwargs = {'user' : DB_USER,
                       'password' : DB_PASSWORD,
                       'database' : DB_NAME,
                       'host' : DB_HOST,
                       'port' : DB_PORT}
        self.cmd = Load(**self.kwargs)

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

    def tearDown(self):
        self.db.clear()

    def get_parser(self, filename):
        if sys.version_info[0] >= 3: # Python 3
            with open(filename, 'r', encoding='UTF-8') as f:
                content = f.read()
        else: # Python 2
            with open(filename, 'r') as f:
                content = f.read().decode('UTF-8')
        return SortingHatParser(content)

    def sort_identities(self, ids):
        return sorted(ids, key=lambda x: x.id)
Exemplo n.º 7
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on profile unit tests"""
    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        # Import predefined dataset for testing
        self._load_test_dataset()

        # Create command
        self.kwargs = {
            'user': DB_USER,
            'password': DB_PASSWORD,
            'database': DB_NAME,
            'host': DB_HOST,
            'port': DB_PORT
        }
        self.cmd = Profile(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US',
                         name='United States of America',
                         alpha3='USA')
            session.add(us)

        # Add identity
        api.add_identity(self.db, 'scm', '*****@*****.**', 'Jane Roe',
                         'jroe')
Exemplo n.º 8
0
class TaskIdentitiesMerge(Task):
    """ Basic class shared by all Sorting Hat tasks """
    def __init__(self,
                 conf,
                 load_orgs=True,
                 load_ids=True,
                 unify=True,
                 autoprofile=True,
                 affiliate=True,
                 bots=True):
        super().__init__(conf)

        self.load_ids = load_ids  # Load identities from raw index
        self.unify = unify  # Unify identities
        self.autoprofile = autoprofile  # Execute autoprofile
        self.affiliate = affiliate  # Affiliate identities
        self.bots = bots  # Mark bots in SH
        self.sh_kwargs = {
            'user': self.db_user,
            'password': self.db_password,
            'database': self.db_sh,
            'host': self.db_host,
            'port': None
        }
        self.db = Database(**self.sh_kwargs)

    def is_backend_task(self):
        return False

    def __get_uuids_from_profile_name(self, profile_name):
        """ Get the uuid for a profile name """
        uuids = []

        with self.db.connect() as session:
            query = session.query(Profile).\
                filter(Profile.name == profile_name)
            profiles = query.all()
            if profiles:
                for p in profiles:
                    uuids.append(p.uuid)
        return uuids

    def __build_sh_command(self):
        cfg = self.config.get_conf()

        db_user = cfg['sortinghat']['user']
        db_password = cfg['sortinghat']['password']
        db_host = cfg['sortinghat']['host']
        db_name = cfg['sortinghat']['database']
        cmd = [
            'sortinghat', '-u', db_user, '-p', db_password, '--host', db_host,
            '-d', db_name
        ]

        return cmd

    def __execute_sh_command(self, cmd):
        logger.debug("Executing %s", cmd)
        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
        outs, errs = proc.communicate()
        uuids = self.__get_uuids_to_refresh(outs.decode("utf8"))
        return_code = proc.returncode
        if return_code != 0:
            logger.error("[sortinghat] Error in command %s", cmd)
            uuids = []
        return uuids

    def __get_uuids_to_refresh(self, data):
        """
        Return the Sortinggat unique identifiers that must be refreshed
        after a unify and affiliate command

        Formats:
        Unique identity ab882b9c6f29837b263448aeb6eab1ec373d7688 merged on 75fc28ef4643de5323e89fb26e4e67c97b24f507
        Unique identity 12deb94aa946193e28c2a933cbee4b338a928042 (acs_at_bitergia.com) affiliated to Bitergia
        """

        if data is None:
            return None

        lines = data.split("\n")
        uuids = []
        for line in lines:
            fields = line.split()
            if 'merged' in line:
                uuids.append(fields[2])
                if fields[5] not in uuids:
                    uuids.append(fields[5])
            elif 'affiliated' in line:
                uuids.append(fields[2])
        return uuids

    def do_affiliate(self):
        cmd = self.__build_sh_command()
        cmd += ['affiliate']
        uuids = self.__execute_sh_command(cmd)
        return uuids

    def do_autoprofile(self, sources):
        cmd = self.__build_sh_command()
        cmd += ['autoprofile'] + sources
        self.__execute_sh_command(cmd)
        return None

    def do_unify(self, kwargs):
        cmd = self.__build_sh_command()
        cmd += ['unify', '--fast-matching', '-m', kwargs['matching']]
        if not kwargs['strict_mapping']:
            cmd += ['--no-strict-matching']
        uuids = self.__execute_sh_command(cmd)
        return uuids

    def execute(self):

        # ** START SYNC LOGIC **
        # Check that enrichment tasks are not active before loading identities
        while True:
            time.sleep(
                10)  # check each 10 seconds if the identities load could start
            with TasksManager.IDENTITIES_TASKS_ON_LOCK:
                with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                    enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON
                    logger.debug("Enrich tasks active: %i", enrich_tasks)
                    if enrich_tasks == 0:
                        # The load of identities can be started
                        TasksManager.IDENTITIES_TASKS_ON = True
                        break
        #  ** END SYNC LOGIC **

        cfg = self.config.get_conf()

        uuids_refresh = []

        if self.unify:
            for algo in cfg['sortinghat']['matching']:
                if not algo:
                    # cfg['sortinghat']['matching'] is an empty list
                    logger.debug('Unify not executed because empty algorithm')
                    continue
                kwargs = {
                    'matching': algo,
                    'fast_matching': True,
                    'strict_mapping': cfg['sortinghat']['strict_mapping']
                }
                logger.info(
                    "[sortinghat] Unifying identities using algorithm %s",
                    kwargs['matching'])
                uuids = self.do_unify(kwargs)
                uuids_refresh += uuids
                logger.debug("uuids to refresh from unify: %s", uuids)

        if self.affiliate:
            if not cfg['sortinghat']['affiliate']:
                logger.debug("Not doing affiliation")
            else:
                # Global enrollments using domains
                logger.info("[sortinghat] Executing affiliate")
                uuids = self.do_affiliate()
                uuids_refresh += uuids
                logger.debug("uuids to refresh from affiliate: %s", uuids)

        if self.autoprofile:
            # autoprofile = [] -> cfg['sortinghat']['autoprofile'][0] = ['']
            if ('autoprofile' not in cfg['sortinghat']
                    or not cfg['sortinghat']['autoprofile'][0]):
                logger.info(
                    "[sortinghat] Autoprofile not configured. Skipping.")
            else:
                logger.info(
                    "[sortinghat] Executing autoprofile for sources: %s",
                    cfg['sortinghat']['autoprofile'])
                sources = cfg['sortinghat']['autoprofile']
                self.do_autoprofile(sources)

        # The uuids must be refreshed in all backends (data sources)
        # Give 5s so the queue is filled and if not, continue without it
        try:
            autorefresh_backends_uuids = TasksManager.UPDATED_UUIDS_QUEUE.get(
                timeout=5)
            for backend_section in autorefresh_backends_uuids:
                autorefresh_backends_uuids[backend_section] += uuids_refresh
            TasksManager.UPDATED_UUIDS_QUEUE.put(autorefresh_backends_uuids)
            logger.debug(
                "Autorefresh uuids queue after processing identities: %s",
                autorefresh_backends_uuids)
        except Empty:
            logger.warning(
                "Autorefresh uuids not active because the queue for it is empty."
            )

        if self.bots:
            if 'bots_names' not in cfg['sortinghat']:
                logger.info(
                    "[sortinghat] Bots name list not configured. Skipping.")
            else:
                logger.info("[sortinghat] Marking bots: %s",
                            cfg['sortinghat']['bots_names'])
                for name in cfg['sortinghat']['bots_names']:
                    # First we need the uuids for the profile name
                    uuids = self.__get_uuids_from_profile_name(name)
                    # Then we can modify the profile setting bot flag
                    profile = {"is_bot": True}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)
                # For quitting the bot flag - debug feature
                if 'no_bots_names' in cfg['sortinghat']:
                    logger.info("[sortinghat] Removing Marking bots: %s",
                                cfg['sortinghat']['no_bots_names'])
                    for name in cfg['sortinghat']['no_bots_names']:
                        uuids = self.__get_uuids_from_profile_name(name)
                        profile = {"is_bot": False}
                        for uuid in uuids:
                            api.edit_profile(self.db, uuid, **profile)

        # Autorefresh must be done once identities processing has finished
        # Give 5s so the queue is filled and if not, continue without it
        try:
            autorefresh_backends = TasksManager.AUTOREFRESH_QUEUE.get(
                timeout=5)
            for backend_section in autorefresh_backends:
                autorefresh_backends[backend_section] = True
            TasksManager.AUTOREFRESH_QUEUE.put(autorefresh_backends)
            logger.debug("Autorefresh queue after processing identities: %s",
                         autorefresh_backends)
        except Empty:
            logger.warning(
                "Autorefresh not active because the queue for it is empty.")

        with TasksManager.IDENTITIES_TASKS_ON_LOCK:
            TasksManager.IDENTITIES_TASKS_ON = False
Exemplo n.º 9
0
class TaskIdentitiesMerge(Task):
    """ Task for processing identities in SortingHat """
    def __init__(self, conf):
        super().__init__(conf)
        self.db = Database(**self.sh_kwargs)
        self.last_autorefresh = datetime.utcnow()  # Last autorefresh date

    def is_backend_task(self):
        return False

    def __get_uuids_from_profile_name(self, profile_name):
        """ Get the uuid for a profile name """
        uuids = []

        with self.db.connect() as session:
            query = session.query(Profile).\
                filter(Profile.name == profile_name)
            profiles = query.all()
            if profiles:
                for p in profiles:
                    uuids.append(p.uuid)
        return uuids

    def __build_sh_command(self):
        cfg = self.config.get_conf()

        db_user = cfg['sortinghat']['user']
        db_password = cfg['sortinghat']['password']
        db_host = cfg['sortinghat']['host']
        db_name = cfg['sortinghat']['database']
        cmd = [
            'sortinghat', '-u', db_user, '-p', db_password, '--host', db_host,
            '-d', db_name
        ]

        return cmd

    def __execute_sh_command(self, cmd):
        logger.debug("Executing %s", cmd)
        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
        outs, errs = proc.communicate()
        return_code = proc.returncode
        if return_code != 0:
            logger.error("[sortinghat] Error in command %s", cmd)
        return return_code

    def do_affiliate(self):
        cmd = self.__build_sh_command()
        cmd += ['affiliate']
        self.__execute_sh_command(cmd)
        return

    def do_autogender(self):
        cmd = self.__build_sh_command()
        cmd += ['autogender']
        self.__execute_sh_command(cmd)
        return None

    def do_autoprofile(self, sources):
        cmd = self.__build_sh_command()
        cmd += ['autoprofile'] + sources
        self.__execute_sh_command(cmd)
        return None

    def do_unify(self, kwargs):
        cmd = self.__build_sh_command()
        cmd += ['unify', '--fast-matching', '-m', kwargs['matching']]
        if not kwargs['strict_mapping']:
            cmd += ['--no-strict-matching']
        self.__execute_sh_command(cmd)
        return

    def execute(self):

        # ** START SYNC LOGIC **
        # Check that enrichment tasks are not active before loading identities
        while True:
            time.sleep(1)  # check each second if the task could start
            with TasksManager.IDENTITIES_TASKS_ON_LOCK:
                with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                    enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON
                    logger.debug("[unify] Enrich tasks active: %i",
                                 enrich_tasks)
                    if enrich_tasks == 0:
                        # The load of identities can be started
                        TasksManager.IDENTITIES_TASKS_ON = True
                        break
        #  ** END SYNC LOGIC **

        cfg = self.config.get_conf()

        uuids_refresh = []

        for algo in cfg['sortinghat']['matching']:
            if not algo:
                # cfg['sortinghat']['matching'] is an empty list
                logger.debug('Unify not executed because empty algorithm')
                continue
            kwargs = {
                'matching': algo,
                'fast_matching': True,
                'strict_mapping': cfg['sortinghat']['strict_mapping']
            }
            logger.info("[sortinghat] Unifying identities using algorithm %s",
                        kwargs['matching'])
            self.do_unify(kwargs)

        if not cfg['sortinghat']['affiliate']:
            logger.debug("Not doing affiliation")
        else:
            # Global enrollments using domains
            logger.info("[sortinghat] Executing affiliate")
            self.do_affiliate()

        if 'autoprofile' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autoprofile'][0]:
            logger.info("[sortinghat] Autoprofile not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autoprofile for sources: %s",
                        cfg['sortinghat']['autoprofile'])
            sources = cfg['sortinghat']['autoprofile']
            self.do_autoprofile(sources)

        if 'autogender' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autogender']:
            logger.info("[sortinghat] Autogender not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autogender")
            self.do_autogender()

        if 'bots_names' not in cfg['sortinghat']:
            logger.info(
                "[sortinghat] Bots name list not configured. Skipping.")
        else:
            logger.info("[sortinghat] Marking bots: %s",
                        cfg['sortinghat']['bots_names'])
            for name in cfg['sortinghat']['bots_names']:
                # First we need the uuids for the profile name
                uuids = self.__get_uuids_from_profile_name(name)
                # Then we can modify the profile setting bot flag
                profile = {"is_bot": True}
                for uuid in uuids:
                    api.edit_profile(self.db, uuid, **profile)
            # For quitting the bot flag - debug feature
            if 'no_bots_names' in cfg['sortinghat']:
                logger.info("[sortinghat] Removing Marking bots: %s",
                            cfg['sortinghat']['no_bots_names'])
                for name in cfg['sortinghat']['no_bots_names']:
                    uuids = self.__get_uuids_from_profile_name(name)
                    profile = {"is_bot": False}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)

        with TasksManager.IDENTITIES_TASKS_ON_LOCK:
            TasksManager.IDENTITIES_TASKS_ON = False
Exemplo n.º 10
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on show unit tests"""

    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        # Import predefined dataset for testing
        self._load_test_dataset()

        # Create command
        self.kwargs = {'user' : DB_USER,
                       'password' : DB_PASSWORD,
                       'database' :DB_NAME,
                       'host' : DB_HOST,
                       'port' : DB_PORT}
        self.cmd = Show(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        # Add organizations
        api.add_organization(self.db, 'Example')
        api.add_organization(self.db, 'Bitergia')

        # Add John Smith identity
        jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid=jsmith_uuid)
        api.edit_profile(self.db, jsmith_uuid, email='*****@*****.**', is_bot=True)

        # Add Joe Roe identity
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane Roe', 'jroe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid=jroe_uuid)
        api.add_identity(self.db, 'unknown', '*****@*****.**',
                         uuid=jroe_uuid)
        api.edit_profile(self.db, jroe_uuid, name='Jane Roe', email='*****@*****.**',
                         is_bot=False, country_code='US')

        # Add unique identity, this one won't have neither identities
        # nor enrollments
        api.add_unique_identity(self.db,
                                '0000000000000000000000000000000000000000')

        # Add enrollments
        api.add_enrollment(self.db, jsmith_uuid, 'Example')

        api.add_enrollment(self.db, jroe_uuid, 'Example')
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(2006, 1, 1),
                           datetime.datetime(2008, 1, 1))
Exemplo n.º 11
0
class TaskIdentitiesMerge(Task):
    """ Task for processing identities in SortingHat """

    def __init__(self, conf):
        super().__init__(conf)

        self.sh_kwargs = {'user': self.db_user, 'password': self.db_password,
                          'database': self.db_sh, 'host': self.db_host,
                          'port': None}
        self.db = Database(**self.sh_kwargs)
        self.last_autorefresh = datetime.utcnow()  # Last autorefresh date

    def is_backend_task(self):
        return False

    def __get_uuids_from_profile_name(self, profile_name):
        """ Get the uuid for a profile name """
        uuids = []

        with self.db.connect() as session:
            query = session.query(Profile).\
                filter(Profile.name == profile_name)
            profiles = query.all()
            if profiles:
                for p in profiles:
                    uuids.append(p.uuid)
        return uuids

    def __build_sh_command(self):
        cfg = self.config.get_conf()

        db_user = cfg['sortinghat']['user']
        db_password = cfg['sortinghat']['password']
        db_host = cfg['sortinghat']['host']
        db_name = cfg['sortinghat']['database']
        cmd = ['sortinghat', '-u', db_user, '-p', db_password, '--host', db_host,
               '-d', db_name]

        return cmd

    def __execute_sh_command(self, cmd):
        logger.debug("Executing %s", cmd)
        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
        outs, errs = proc.communicate()
        return_code = proc.returncode
        if return_code != 0:
            logger.error("[sortinghat] Error in command %s", cmd)
        return return_code

    def do_affiliate(self):
        cmd = self.__build_sh_command()
        cmd += ['affiliate']
        self.__execute_sh_command(cmd)
        return

    def do_autogender(self):
        cmd = self.__build_sh_command()
        cmd += ['autogender']
        self.__execute_sh_command(cmd)
        return None

    def do_autoprofile(self, sources):
        cmd = self.__build_sh_command()
        cmd += ['autoprofile'] + sources
        self.__execute_sh_command(cmd)
        return None

    def do_unify(self, kwargs):
        cmd = self.__build_sh_command()
        cmd += ['unify', '--fast-matching', '-m', kwargs['matching']]
        if not kwargs['strict_mapping']:
            cmd += ['--no-strict-matching']
        self.__execute_sh_command(cmd)
        return

    def execute(self):

        # ** START SYNC LOGIC **
        # Check that enrichment tasks are not active before loading identities
        while True:
            time.sleep(1)  # check each second if the task could start
            with TasksManager.IDENTITIES_TASKS_ON_LOCK:
                with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                    enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON
                    logger.debug("[unify] Enrich tasks active: %i", enrich_tasks)
                    if enrich_tasks == 0:
                        # The load of identities can be started
                        TasksManager.IDENTITIES_TASKS_ON = True
                        break
        #  ** END SYNC LOGIC **

        cfg = self.config.get_conf()

        uuids_refresh = []

        for algo in cfg['sortinghat']['matching']:
            if not algo:
                # cfg['sortinghat']['matching'] is an empty list
                logger.debug('Unify not executed because empty algorithm')
                continue
            kwargs = {'matching': algo, 'fast_matching': True,
                      'strict_mapping': cfg['sortinghat']['strict_mapping']}
            logger.info("[sortinghat] Unifying identities using algorithm %s",
                        kwargs['matching'])
            self.do_unify(kwargs)

        if not cfg['sortinghat']['affiliate']:
            logger.debug("Not doing affiliation")
        else:
            # Global enrollments using domains
            logger.info("[sortinghat] Executing affiliate")
            self.do_affiliate()

        if 'autoprofile' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autoprofile'][0]:
            logger.info("[sortinghat] Autoprofile not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autoprofile for sources: %s",
                        cfg['sortinghat']['autoprofile'])
            sources = cfg['sortinghat']['autoprofile']
            self.do_autoprofile(sources)

        if 'autogender' not in cfg['sortinghat'] or \
                not cfg['sortinghat']['autogender']:
            logger.info("[sortinghat] Autogender not configured. Skipping.")
        else:
            logger.info("[sortinghat] Executing autogender")
            self.do_autogender()

        if 'bots_names' not in cfg['sortinghat']:
            logger.info("[sortinghat] Bots name list not configured. Skipping.")
        else:
            logger.info("[sortinghat] Marking bots: %s",
                        cfg['sortinghat']['bots_names'])
            for name in cfg['sortinghat']['bots_names']:
                # First we need the uuids for the profile name
                uuids = self.__get_uuids_from_profile_name(name)
                # Then we can modify the profile setting bot flag
                profile = {"is_bot": True}
                for uuid in uuids:
                    api.edit_profile(self.db, uuid, **profile)
            # For quitting the bot flag - debug feature
            if 'no_bots_names' in cfg['sortinghat']:
                logger.info("[sortinghat] Removing Marking bots: %s",
                            cfg['sortinghat']['no_bots_names'])
                for name in cfg['sortinghat']['no_bots_names']:
                    uuids = self.__get_uuids_from_profile_name(name)
                    profile = {"is_bot": False}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)

        with TasksManager.IDENTITIES_TASKS_ON_LOCK:
            TasksManager.IDENTITIES_TASKS_ON = False
Exemplo n.º 12
0
class TaskIdentitiesMerge(Task):
    """ Basic class shared by all Sorting Hat tasks """
    def __init__(self,
                 conf,
                 load_orgs=True,
                 load_ids=True,
                 unify=True,
                 autoprofile=True,
                 affiliate=True,
                 bots=True):
        super().__init__(conf)

        self.load_ids = load_ids  # Load identities from raw index
        self.unify = unify  # Unify identities
        self.autoprofile = autoprofile  # Execute autoprofile
        self.affiliate = affiliate  # Affiliate identities
        self.bots = bots  # Mark bots in SH
        self.sh_kwargs = {
            'user': self.db_user,
            'password': self.db_password,
            'database': self.db_sh,
            'host': self.db_host,
            'port': None
        }
        self.db = Database(**self.sh_kwargs)

    def is_backend_task(self):
        return False

    def __get_uuids_from_profile_name(self, profile_name):
        """ Get the uuid for a profile name """
        uuids = []

        with self.db.connect() as session:
            query = session.query(Profile).\
            filter(Profile.name == profile_name)
            profiles = query.all()
            if profiles:
                for p in profiles:
                    uuids.append(p.uuid)
        return uuids

    def run(self):
        if self.unify:
            for algo in self.conf['sh_matching']:
                kwargs = {'matching': algo, 'fast_matching': True}
                logger.info(
                    "[sortinghat] Unifying identities using algorithm %s",
                    kwargs['matching'])
                code = Unify(**self.sh_kwargs).unify(**kwargs)
                if code != CMD_SUCCESS:
                    logger.error("[sortinghat] Error in unify %s", kwargs)

        if self.affiliate:
            # Global enrollments using domains
            logger.info("[sortinghat] Executing affiliate")
            code = Affiliate(**self.sh_kwargs).affiliate()
            if code != CMD_SUCCESS:
                logger.error("[sortinghat] Error in affiliate %s", kwargs)

        if self.autoprofile:
            if not 'sh_autoprofile' in self.conf:
                logger.info(
                    "[sortinghat] Autoprofile not configured. Skipping.")
            else:
                logger.info("[sortinghat] Executing autoprofile: %s",
                            self.conf['sh_autoprofile'])
                sources = self.conf['sh_autoprofile']
                code = AutoProfile(**self.sh_kwargs).autocomplete(sources)
                if code != CMD_SUCCESS:
                    logger.error("Error in autoprofile %s", kwargs)

        if self.bots:
            if not 'sh_bots_names' in self.conf:
                logger.info(
                    "[sortinghat] Bots name list not configured. Skipping.")
            else:
                logger.info("[sortinghat] Marking bots: %s",
                            self.conf['sh_bots_names'])
                for name in self.conf['sh_bots_names']:
                    # First we need the uuids for the profile name
                    uuids = self.__get_uuids_from_profile_name(name)
                    # Then we can modify the profile setting bot flag
                    profile = {"is_bot": True}
                    for uuid in uuids:
                        api.edit_profile(self.db, uuid, **profile)
                # For quitting the bot flag - debug feature
                if 'sh_no_bots_names' in self.conf:
                    logger.info("[sortinghat] Removing Marking bots: %s",
                                self.conf['sh_no_bots_names'])
                    for name in self.conf['sh_no_bots_names']:
                        uuids = self.__get_uuids_from_profile_name(name)
                        profile = {"is_bot": False}
                        for uuid in uuids:
                            api.edit_profile(self.db, uuid, **profile)
Exemplo n.º 13
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on show unit tests"""

    def setUp(self):
        import tempfile

        if not hasattr(sys.stdout, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        # Import predefined dataset for testing
        self._load_test_dataset()

        # Temporary file for outputs
        self.tmpfile = tempfile.mkstemp()[1]

        # Create command
        self.kwargs = {'user' : DB_USER,
                       'password' : DB_PASSWORD,
                       'database' : DB_NAME,
                       'host' : DB_HOST,
                       'port' : DB_PORT}
        self.cmd = Export(**self.kwargs)

    def tearDown(self):
        import os

        self.db.clear()
        os.remove(self.tmpfile)

    def read_json(self, filename):
        if sys.version_info[0] >= 3: # Python 3
            with open(filename, 'r', encoding='UTF-8') as f:
                content = f.read()
        else: # Python 2
            with open(filename, 'r') as f:
                content = f.read().decode('UTF-8')

        obj = json.loads(content)
        return obj

    def _load_test_dataset(self):
        import datetime

        self.db.clear()

        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        # Add organizations
        api.add_organization(self.db, 'Example')
        api.add_domain(self.db, 'Example', 'example.com', is_top_domain=True)
        api.add_domain(self.db, 'Example', 'example.net', is_top_domain=True)

        api.add_organization(self.db, 'Bitergia')
        api.add_domain(self.db, 'Bitergia', 'bitergia.net', is_top_domain=True)
        api.add_domain(self.db, 'Bitergia', 'bitergia.com', is_top_domain=True)
        api.add_domain(self.db, 'Bitergia', 'api.bitergia.com', is_top_domain=False)
        api.add_domain(self.db, 'Bitergia', 'test.bitergia.com', is_top_domain=False)

        api.add_organization(self.db, 'Unknown')

        # Add John Smith identity
        jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid=jsmith_uuid)
        api.edit_profile(self.db, jsmith_uuid, email='*****@*****.**', is_bot=True)

        # Add Joe Roe identity
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane Roe', 'jroe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid=jroe_uuid)
        api.add_identity(self.db, 'unknown', '*****@*****.**',
                         uuid=jroe_uuid)
        api.edit_profile(self.db, jroe_uuid, name='Jane Roe', email='*****@*****.**',
                         is_bot=False, country_code='US')

        # Add unique identity, this one won't have neither identities
        # nor enrollments
        api.add_unique_identity(self.db,
                                '0000000000000000000000000000000000000000')

        # Add enrollments
        api.add_enrollment(self.db, jsmith_uuid, 'Example')

        api.add_enrollment(self.db, jroe_uuid, 'Example')
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(2006, 1, 1),
                           datetime.datetime(2008, 1, 1))

        # Add blacklist
        api.add_to_matching_blacklist(self.db, '*****@*****.**')
        api.add_to_matching_blacklist(self.db, 'John Smith')
Exemplo n.º 14
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on show unit tests"""
    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        # Import predefined dataset for testing
        self._load_test_dataset()

        # Create command
        self.kwargs = {
            'user': DB_USER,
            'password': DB_PASSWORD,
            'database': DB_NAME,
            'host': DB_HOST,
            'port': DB_PORT
        }
        self.cmd = Show(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US',
                         name='United States of America',
                         alpha3='USA')
            session.add(us)

        # Add organizations
        api.add_organization(self.db, 'Example')
        api.add_organization(self.db, 'Bitergia')

        # Add John Smith identity
        jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.add_identity(self.db,
                         'scm',
                         '*****@*****.**',
                         'John Smith',
                         uuid=jsmith_uuid)
        api.edit_profile(self.db,
                         jsmith_uuid,
                         email='*****@*****.**',
                         is_bot=True)

        # Add Joe Roe identity
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane Roe', 'jroe')
        api.add_identity(self.db, 'scm', '*****@*****.**', uuid=jroe_uuid)
        api.add_identity(self.db,
                         'unknown',
                         '*****@*****.**',
                         uuid=jroe_uuid)
        api.edit_profile(self.db,
                         jroe_uuid,
                         name='Jane Roe',
                         email='*****@*****.**',
                         is_bot=False,
                         country_code='US')

        # Add unique identity, this one won't have neither identities
        # nor enrollments
        api.add_unique_identity(self.db,
                                '0000000000000000000000000000000000000000')

        # Add enrollments
        api.add_enrollment(self.db, jsmith_uuid, 'Example')

        api.add_enrollment(self.db, jroe_uuid, 'Example')
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(2006, 1, 1),
                           datetime.datetime(2008, 1, 1))
Exemplo n.º 15
0
class TestBaseCase(unittest.TestCase):
    """Defines common setup and teardown methods on add unit tests"""
    def setUp(self):
        if not hasattr(sys.stdout, 'getvalue') and not hasattr(
                sys.stderr, 'getvalue'):
            self.fail('This test needs to be run in buffered mode')

        # Create a connection to check the contents of the registry
        self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT)

        self.db.clear()

        self._load_test_dataset()

        # Create command
        self.kwargs = {
            'user': DB_USER,
            'password': DB_PASSWORD,
            'database': DB_NAME,
            'host': DB_HOST,
            'port': DB_PORT
        }
        self.cmd = Merge(**self.kwargs)

    def tearDown(self):
        self.db.clear()

    def _load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US',
                         name='United States of America',
                         alpha3='USA')
            session.add(us)

        api.add_unique_identity(self.db, 'John Smith')
        api.add_identity(self.db,
                         'scm',
                         '*****@*****.**',
                         uuid='John Smith')
        api.add_identity(self.db,
                         'scm',
                         '*****@*****.**',
                         'John Smith',
                         uuid='John Smith')
        api.edit_profile(self.db,
                         'John Smith',
                         name='John Smith',
                         is_bot=False)

        api.add_unique_identity(self.db, 'John Doe')
        api.add_identity(self.db, 'scm', '*****@*****.**', uuid='John Doe')
        api.edit_profile(self.db,
                         'John Doe',
                         email='*****@*****.**',
                         is_bot=True,
                         country_code='US')

        api.add_organization(self.db, 'Example')
        api.add_enrollment(self.db, 'John Smith', 'Example')
        api.add_enrollment(self.db, 'John Doe', 'Example')

        api.add_organization(self.db, 'Bitergia')
        api.add_enrollment(self.db, 'John Smith', 'Bitergia')
        api.add_enrollment(self.db, 'John Doe', 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))

        api.add_organization(self.db, 'LibreSoft')
def main():
    """ Read emails and look for uuids.
    """

    # Parse args
    args = parse_args()

    # Read config file
    parser = configparser.ConfigParser()
    parser.read('.settings')
    section = parser['SortingHat']
    db_user = section['db_user']
    db_password = section['password']
    db_name = section['db_name']
    db_host = section['host']
    db_port = section['port']

    db = Database(db_user, db_password, db_name, db_host, db_port)

    # Get email blacklist from SH
    print('Reading email blacklist from SH')
    blacklist = sortinghat.api.blacklist(db)
    email_blacklist = []
    for identity in blacklist:
        email_blacklist.append(identity.excluded)

    with db.connect() as session:
        print('Searching for E-Mails in SH...')
        query = session.query(UniqueIdentity)
        query = query.filter(Identity.source == 'git',
                             UniqueIdentity.uuid == Identity.uuid)
        uidentities = query.order_by(UniqueIdentity.uuid).all()

        print(len(uidentities), ' entities read from SH')

        print('Creating E-Mails dict...')
        email_dict = {}
        dups = 0
        for uidentity in uidentities:

            for identity in uidentity.identities:

                if identity.email is None or identity.email == 'none@none' \
                    or identity.email == '' or identity.email == 'unknown' \
                    or identity.email in email_blacklist:
                    continue
                if identity.email in email_dict:
                    if identity.uuid != email_dict[identity.email]:
                        dups += 1

                email_dict[identity.email] = identity.uuid

        print('Done! Entities in emails dict: ', len(email_dict), ' Dups: ',
              dups)

    email_list = read_emails(args.input)
    print(len(email_dict), ' emails read from file')

    # Find UUIDS
    matches = {}
    uuids = set()
    dups_in_csv = 0
    not_found_count = 0
    for email in email_list:

        if email in matches:
            dups_in_csv += 1
            #print("Duplicated email in list:", email)

        elif email in email_dict:
            matches[email] = email_dict[email]
            uuids.add(email_dict[email])

        else:
            #print('Not Found: E-Mail:', email)
            not_found_count += 1

    print('dups in csv:', dups_in_csv)
    print('Not found:', not_found_count)
    print('Found         : ', len(matches))
    print('Found (unique): ', len(uuids))

    # Export results
    print('Writing results...')
    csv_array = []
    for email, uuid in matches.items():
        csv_array.append({'email': email, 'uuid': uuid})

    fieldnames = ['email', 'uuid']
    with open(args.output, 'w') as csv_out:
        csvwriter = csv.DictWriter(csv_out,
                                   delimiter=',',
                                   fieldnames=fieldnames)
        csvwriter.writeheader()
        for row in csv_array:
            csvwriter.writerow(row)

    print('Results wrote to file ', args.output)