Пример #1
0
    def _test_refresh_identities(self):
        """Test refresh identities"""

        # populate raw index
        perceval_backend = None
        clean = True
        ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean,
                                    ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, ocean_backend)

        # populate enriched index
        enrich_backend = self.connectors[self.connector][2]()
        load_identities(ocean_backend, enrich_backend)
        enrich_backend = self.connectors[self.connector][2](
            db_sortinghat=DB_SORTINGHAT,
            db_user=self.db_user,
            db_password=self.db_password)
        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean,
                                     enrich_backend)
        enrich_backend.set_elastic(elastic_enrich)
        enrich_backend.enrich_items(ocean_backend)

        total = refresh_identities(enrich_backend)
        return total
Пример #2
0
    def _test_raw_to_enrich(self, sortinghat=False, projects=False):
        """Test whether raw indexes are properly enriched"""

        # populate raw index
        perceval_backend = None
        clean = True
        ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, ocean_backend)

        # populate enriched index
        if not sortinghat and not projects:
            enrich_backend = self.connectors[self.connector][2]()
        elif sortinghat and not projects:
            enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT,
                                                                db_user=self.db_user,
                                                                db_password=self.db_password)
        elif not sortinghat and projects:
            enrich_backend = self.connectors[self.connector][2](json_projects_map=FILE_PROJECTS,
                                                                db_user=self.db_user,
                                                                db_password=self.db_password)

        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend)
        enrich_backend.set_elastic(elastic_enrich)

        # Load SH identities
        if sortinghat:
            load_identities(ocean_backend, enrich_backend)

        raw_count = len([item for item in ocean_backend.fetch()])
        enrich_count = enrich_backend.enrich_items(ocean_backend)
        # self._test_csv_mappings(sortinghat)

        return {'raw': raw_count, 'enrich': enrich_count}
Пример #3
0
    def _test_raw_to_enrich_anonymized(self, sortinghat=False, projects=False):
        """Test whether raw indexes are properly enriched"""

        # populate raw index
        perceval_backend = None
        clean = True
        self.ocean_backend = self.connectors[self.connector][1](
            perceval_backend, anonymize=True)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index_anonymized,
                                    clean, self.ocean_backend)
        self.ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, self.ocean_backend)

        # populate enriched index
        self.enrich_backend = self.connectors[self.connector][2]()

        elastic_enrich = get_elastic(self.es_con, self.enrich_index_anonymized,
                                     clean, self.enrich_backend,
                                     self.enrich_aliases)
        self.enrich_backend.set_elastic(elastic_enrich)

        raw_count = len([item for item in self.ocean_backend.fetch()])
        enrich_count = self.enrich_backend.enrich_items(self.ocean_backend)

        return {'raw': raw_count, 'enrich': enrich_count}
Пример #4
0
    def test_enrich(self, sortinghat=False, projects=False):
        """Test enrich all sources"""
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        logging.info("Enriching data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            perceval_backend = None
            ocean_index = "test_" + con
            enrich_index = "test_" + con + "_enrich"
            clean = False
            ocean_backend = connectors[con][1](perceval_backend)
            elastic_ocean = get_elastic(es_con, ocean_index, clean,
                                        ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)
            clean = True
            if not sortinghat and not projects:
                enrich_backend = connectors[con][2]()
            elif sortinghat and not projects:
                enrich_backend = connectors[con][2](
                    db_sortinghat=DB_SORTINGHAT)
            elif not sortinghat and projects:
                enrich_backend = connectors[con][2](
                    db_projects_map=DB_PROJECTS)
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            if sortinghat:
                # Load SH identities
                load_identities(ocean_backend, enrich_backend)
            enrich_count = enrich_backend.enrich_items(ocean_backend)

            if enrich_count is not None:
                logging.info("Total items enriched %i ", enrich_count)
Пример #5
0
    def _test_refresh_project(self):
        """Test refresh project field"""

        # populate raw index
        perceval_backend = None
        clean = True
        ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean,
                                    ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, ocean_backend)

        # populate enriched index
        enrich_backend = self.connectors[self.connector][2](
            db_projects_map=DB_PROJECTS,
            db_user=self.db_user,
            db_password=self.db_password)

        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean,
                                     enrich_backend)
        enrich_backend.set_elastic(elastic_enrich)
        enrich_backend.enrich_items(ocean_backend)

        total = refresh_projects(enrich_backend)
        return total
Пример #6
0
    def _test_study(self, test_study):
        """Test the execution of a study"""

        # populate raw index
        perceval_backend = None
        clean = True
        ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, ocean_backend)

        # populate enriched index
        enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT,
                                                            db_user=self.db_user,
                                                            db_password=self.db_password)

        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend)
        enrich_backend.set_elastic(elastic_enrich)
        enrich_backend.enrich_items(ocean_backend)

        for study in enrich_backend.studies:
            if test_study == study.__name__:
                found = (study, ocean_backend, enrich_backend)

        return found
Пример #7
0
    def _get_enrich_backend(self):
        db_projects_map = None
        json_projects_map = None
        clean = False
        connector = get_connector_from_name(
            self.get_backend(self.backend_section))

        if 'projects_file' in self.conf['projects']:
            json_projects_map = self.conf['projects']['projects_file']

        enrich_backend = connector[2](self.db_sh, db_projects_map,
                                      json_projects_map, self.db_user,
                                      self.db_password, self.db_host)
        elastic_enrich = get_elastic(
            self.conf['es_enrichment']['url'],
            self.conf[self.backend_section]['enriched_index'], clean,
            enrich_backend)
        enrich_backend.set_elastic(elastic_enrich)

        if 'github' in self.conf.keys() and \
            'backend_token' in self.conf['github'].keys() and \
            self.get_backend(self.backend_section) == "git":

            gh_token = self.conf['github']['backend_token']
            enrich_backend.set_github_token(gh_token)

        if 'unaffiliated_group' in self.conf['sortinghat']:
            enrich_backend.unaffiliated_group = self.conf['sortinghat'][
                'unaffiliated_group']

        return enrich_backend
Пример #8
0
    def test_add_alias(self):
        """Test whether add_alias properly works"""

        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']

        tmp_index = "test-add-aliases"
        tmp_index_url = es_con + "/" + tmp_index

        enrich_backend = get_connectors()["git"][2]()
        elastic_enrich = get_elastic(es_con, tmp_index, True, enrich_backend)
        self._enrich.set_elastic(elastic_enrich)

        # add alias
        with self.assertLogs(logger, level='INFO') as cm:
            self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS)

        self.assertEqual(cm.output[0],
                         'INFO:grimoire_elk.elastic:Alias %s created on %s.'
                         % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url)))

        r = self._enrich.requests.get(self._enrich.elastic.index_url + "/_alias", headers=HEADER_JSON, verify=False)
        self.assertIn(DEMOGRAPHICS_ALIAS, r.json()[self._enrich.elastic.index]['aliases'])

        # add alias again
        with self.assertLogs(logger, level='DEBUG') as cm:
            self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS)

        self.assertEqual(cm.output[0],
                         'DEBUG:grimoire_elk.elastic:Alias %s already exists on %s.'
                         % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url)))

        requests.delete(tmp_index_url, verify=False)
Пример #9
0
    def _get_ocean_backend(self, enrich_backend):
        backend_cmd = None

        no_incremental = False
        clean = False

        from .task_projects import TaskProjects
        repos = TaskProjects.get_repos_by_backend_section(self.backend_section)
        if len(repos) == 1:
            # Support for filter raw when we have one repo
            (filter_raw, filters_raw_prefix) = self.__filters_raw(repos[0])
            if filter_raw or filters_raw_prefix:
                logger.info("Using %s %s for getting identities from raw",
                            filter_raw, filters_raw_prefix)
            ocean_backend = get_ocean_backend(backend_cmd, enrich_backend,
                                              no_incremental, filter_raw,
                                              filters_raw_prefix)
        else:
            ocean_backend = get_ocean_backend(backend_cmd, enrich_backend,
                                              no_incremental)

        elastic_ocean = get_elastic(
            self._get_collection_url(),
            self.conf[self.backend_section]['raw_index'], clean, ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)

        return ocean_backend
Пример #10
0
    def _get_ocean_backend(self, enrich_backend):
        backend_cmd = None

        no_incremental = False
        clean = False

        from .task_projects import TaskProjects
        repos = TaskProjects.get_repos_by_backend_section(self.backend_section)
        if len(repos) == 1:
            # Support for filter raw when we have one repo
            repo = repos[0]
            repo, repo_labels = self._extract_repo_labels(self.backend_section, repo)
            p2o_args = self._compose_p2o_params(self.backend_section, repo)
            filter_raw = p2o_args['filter-raw'] if 'filter-raw' in p2o_args else None

            ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental, filter_raw)
        else:
            ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental)

        elastic_ocean = get_elastic(self._get_collection_url(),
                                    self.conf[self.backend_section]['raw_index'],
                                    clean, ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)

        return ocean_backend
Пример #11
0
    def __autorefresh_studies(self, cfg):
        """Execute autorefresh for areas of code study if configured"""

        if 'studies' not in self.conf[self.backend_section] or \
                'enrich_areas_of_code:git' not in self.conf[self.backend_section]['studies']:
            logger.debug("Not doing autorefresh for studies, Areas of Code study is not active.")
            return

        aoc_index = self.conf['enrich_areas_of_code:git'].get('out_index', GitEnrich.GIT_AOC_ENRICHED)

        # if `out_index` exists but has no value, use default
        if not aoc_index:
            aoc_index = GitEnrich.GIT_AOC_ENRICHED

        logger.debug("Autorefresh for Areas of Code study index: %s", aoc_index)

        es = Elasticsearch([self.conf['es_enrichment']['url']], timeout=100,
                           verify_certs=self._get_enrich_backend().elastic.requests.verify)

        if not es.indices.exists(index=aoc_index):
            logger.debug("Not doing autorefresh, index doesn't exist for Areas of Code study")
            return

        logger.debug("Doing autorefresh for Areas of Code study")

        # Create a GitEnrich backend tweaked to work with AOC index
        aoc_backend = GitEnrich(self.db_sh, None, cfg['projects']['projects_file'],
                                self.db_user, self.db_password, self.db_host)
        aoc_backend.mapping = None
        aoc_backend.roles = ['author']
        elastic_enrich = get_elastic(self.conf['es_enrichment']['url'],
                                     aoc_index, clean=False, backend=aoc_backend)
        aoc_backend.set_elastic(elastic_enrich)

        self.__autorefresh(aoc_backend, studies=True)
Пример #12
0
    def test_refresh_project(self):
        """Test refresh project field for all sources"""
        # self.test_enrich_sh() # Load the identities in ES
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        db_user = ''
        db_password = ''
        if 'Database' in config:
            if 'user' in config['Database']:
                db_user = config['Database']['user']
            if 'password' in config['Database']:
                db_password = config['Database']['password']

        logging.info("Refreshing data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            enrich_index = "test_" + con + "_enrich"
            enrich_backend = connectors[con][2](db_projects_map=DB_PROJECTS,
                                                db_user=db_user,
                                                db_password=db_password)
            clean = False
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            logging.info("Refreshing projects fields in enriched index %s",
                         elastic_enrich.index_url)
            self.__refresh_projects(enrich_backend)
Пример #13
0
    def test_enrich(self, sortinghat=False, projects=False):
        """Test enrich all sources"""
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        db_user = ''
        db_password = ''
        if 'Database' in config:
            if 'user' in config['Database']:
                db_user = config['Database']['user']
            if 'password' in config['Database']:
                db_password = config['Database']['password']
        logging.info("Enriching data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            perceval_backend = None
            ocean_index = "test_" + con
            enrich_index = "test_" + con + "_enrich"
            clean = False
            ocean_backend = connectors[con][1](perceval_backend)
            elastic_ocean = get_elastic(es_con, ocean_index, clean,
                                        ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)
            clean = True
            if not sortinghat and not projects:
                enrich_backend = connectors[con][2]()
            elif sortinghat and not projects:
                enrich_backend = connectors[con][2](
                    db_sortinghat=DB_SORTINGHAT,
                    db_user=db_user,
                    db_password=db_password)
            elif not sortinghat and projects:
                enrich_backend = connectors[con][2](
                    db_projects_map=DB_PROJECTS,
                    db_user=db_user,
                    db_password=db_password)
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            if sortinghat:
                # Load SH identities
                load_identities(ocean_backend, enrich_backend)
            raw_count = len([item for item in ocean_backend.fetch()])
            enrich_count = enrich_backend.enrich_items(ocean_backend)

            self.assertEqual(raw_count, enrich_count)
Пример #14
0
    def _test_study(self,
                    test_study,
                    projects_json_repo=None,
                    projects_json=None,
                    prjs_map=None):
        """Test the execution of a study"""

        # populate raw index
        perceval_backend = None
        clean = True
        self.ocean_backend = self.connectors[self.connector][1](
            perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean,
                                    self.ocean_backend)
        self.ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, self.ocean_backend)

        # populate enriched index
        self.enrich_backend = self.connectors[self.connector][2](
            db_sortinghat=DB_SORTINGHAT,
            db_user=self.db_user,
            db_password=self.db_password,
            db_projects_map=self.db_projects_map)

        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean,
                                     self.enrich_backend)
        self.enrich_backend.set_elastic(elastic_enrich)

        if projects_json:
            self.enrich_backend.json_projects = projects_json

        if projects_json_repo:
            self.enrich_backend.projects_json_repo = projects_json_repo

        if prjs_map:
            self.enrich_backend.prjs_map = prjs_map

        self.enrich_backend.enrich_items(self.ocean_backend)

        for study in self.enrich_backend.studies:
            if test_study == study.__name__:
                found = (study, self.ocean_backend, self.enrich_backend)
                break

        return found
Пример #15
0
def get_perceval_params(url, index):
    logging.info("Get perceval params for index: %s" % (index))
    elastic = get_elastic(url, ConfOcean.get_index())
    ConfOcean.set_elastic(elastic)

    r = requests.get(elastic.index_url+"/repos/"+index)

    params = r.json()['_source']['params']

    return params
Пример #16
0
def get_perceval_params(url, index):
    logging.info("Get perceval params for index: %s" % (index))
    elastic = get_elastic(url, ConfOcean.get_index())
    ConfOcean.set_elastic(elastic)

    r = requests.get(elastic.index_url + "/repos/" + index)

    params = r.json()['_source']['params']

    return params
Пример #17
0
    def _test_raw_to_enrich(self, sortinghat=False, projects=False, pair_programming=False):
        """Test whether raw indexes are properly enriched"""

        # populate raw index
        perceval_backend = None
        clean = True
        self.ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend)
        self.ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, self.ocean_backend)

        # populate enriched index
        if not sortinghat and not projects:
            self.enrich_backend = self.connectors[self.connector][2]()
        elif sortinghat and not projects:
            self.enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT,
                                                                     db_user=self.db_user,
                                                                     db_password=self.db_password)
        elif not sortinghat and projects:
            self.enrich_backend = self.connectors[self.connector][2](json_projects_map=FILE_PROJECTS,
                                                                     db_user=self.db_user,
                                                                     db_password=self.db_password)
        if pair_programming:
            self.enrich_backend.pair_programming = pair_programming

        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend, self.enrich_aliases)
        self.enrich_backend.set_elastic(elastic_enrich)

        categories = {1: 'General', 6: 'Technical', 2: 'Ecosystem', 3: 'Staff'}
        self.enrich_backend.categories = MagicMock(return_value=categories)

        categories_tree = {1: {}, 6: {}, 2: {}, 3: {}}
        self.enrich_backend.categories_tree = MagicMock(return_value=categories_tree)

        # Load SH identities
        if sortinghat:
            load_identities(self.ocean_backend, self.enrich_backend)

        raw_count = len([item for item in self.ocean_backend.fetch()])
        enrich_count = self.enrich_backend.enrich_items(self.ocean_backend)
        # self._test_csv_mappings(sortinghat)

        return {'raw': raw_count, 'enrich': enrich_count}
Пример #18
0
    def _test_items_to_raw_anonymized(self):
        clean = True
        perceval_backend = None
        self.ocean_backend = self.connectors[self.connector][1](perceval_backend, anonymize=True)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index_anonymized, clean, self.ocean_backend,
                                    self.ocean_aliases)
        self.ocean_backend.set_elastic(elastic_ocean)

        raw_items = data2es(self.items, self.ocean_backend)

        return {'items': len(self.items), 'raw': raw_items}
Пример #19
0
    def test_refresh_project(self):
        """Test refresh project field for all sources"""

        # populate raw index
        perceval_backend = Discourse('https://example.com', api_token='1234', api_username='******')
        clean = True
        self.ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend)
        self.ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, self.ocean_backend)

        # populate enriched index
        self.enrich_backend = self.connectors[self.connector][2](db_projects_map=DB_PROJECTS,
                                                                 db_user=self.db_user,
                                                                 db_password=self.db_password)

        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend)
        self.enrich_backend.set_elastic(elastic_enrich)
        self.enrich_backend.enrich_items(self.ocean_backend)

        total = refresh_projects(self.enrich_backend)
Пример #20
0
    def _test_items_to_raw(self):
        """Test whether fetched items are properly loaded to ES"""

        clean = True
        perceval_backend = None
        ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)

        raw_items = data2es(self.items, ocean_backend)

        return {'items': len(self.items), 'raw': raw_items}
Пример #21
0
 def collect_arthur_items(repo):
     aitems = self.__feed_backend_arthur(repo)
     if not aitems:
         return
     connector = get_connector_from_name(self.backend_section)
     klass = connector[1]  # Ocean backend for the connector
     ocean_backend = klass(None)
     es_col_url = self._get_collection_url()
     es_index = self.conf[self.backend_section]['raw_index']
     clean = False
     elastic_ocean = get_elastic(es_col_url, es_index, clean, ocean_backend)
     ocean_backend.set_elastic(elastic_ocean)
     ocean_backend.feed(arthur_items=aitems)
Пример #22
0
    def test_refresh_identities(self):
        """Test refresh identities"""

        # populate raw index
        perceval_backend = Discourse('https://example.com', api_token='1234', api_username='******')
        clean = True
        self.ocean_backend = self.connectors[self.connector][1](perceval_backend)
        elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend)
        self.ocean_backend.set_elastic(elastic_ocean)
        data2es(self.items, self.ocean_backend)

        # populate enriched index
        self.enrich_backend = self.connectors[self.connector][2]()
        load_identities(self.ocean_backend, self.enrich_backend)
        self.enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT,
                                                                 db_user=self.db_user,
                                                                 db_password=self.db_password)
        elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend)
        self.enrich_backend.set_elastic(elastic_enrich)
        self.enrich_backend.enrich_items(self.ocean_backend)

        total = refresh_identities(self.enrich_backend)
Пример #23
0
    def _get_ocean_backend(self, enrich_backend):
        backend_cmd = None

        no_incremental = False
        clean = False

        ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental)
        elastic_ocean = get_elastic(self._get_collection_url(),
                                    self.conf[self.backend_name]['raw_index'],
                                    clean, ocean_backend)
        ocean_backend.set_elastic(elastic_ocean)

        return ocean_backend
Пример #24
0
def feed_backends(url, clean, debug=False, redis=None):
    ''' Update Ocean for all existing backends '''

    logging.info("Updating all Ocean")
    elastic = get_elastic(url, ConfOcean.get_index(), clean)
    ConfOcean.set_elastic(elastic)
    fetch_cache = False

    q = Queue('update', connection=Redis(redis), async=async_)

    for repo in ConfOcean.get_repos():
        task_feed = q.enqueue(feed_backend, url, clean, fetch_cache,
                              repo['backend_name'], repo['backend_params'],
                              repo['index'], repo['index_enrich'],
                              repo['project'])
        logging.info("Queued job")
        logging.info(task_feed)
Пример #25
0
 def test_data_load(self):
     """Test load all sources JSON data into ES"""
     config = configparser.ConfigParser()
     config.read(CONFIG_FILE)
     es_con = dict(config.items('ElasticSearch'))['url']
     logging.info("Loading data in: %s", es_con)
     connectors = get_connectors()
     for con in sorted(connectors.keys()):
         with open(os.path.join("data", con + ".json")) as f:
             items = json.load(f)
             es_index = "test_" + con
             clean = True
             perceval_backend = None
             ocean_backend = connectors[con][1](perceval_backend)
             elastic_ocean = get_elastic(es_con, es_index, clean,
                                         ocean_backend)
             ocean_backend.set_elastic(elastic_ocean)
             self.__data2es(items, ocean_backend)
Пример #26
0
 def test_refresh_identities(self):
     """Test refresh identities for all sources"""
     # self.test_enrich_sh() # Load the identities in ES
     config = configparser.ConfigParser()
     config.read(CONFIG_FILE)
     es_con = dict(config.items('ElasticSearch'))['url']
     logging.info("Refreshing data in: %s", es_con)
     connectors = get_connectors()
     for con in sorted(connectors.keys()):
         enrich_index = "test_" + con + "_enrich"
         enrich_backend = connectors[con][2](db_sortinghat=DB_SORTINGHAT)
         clean = False
         elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                      enrich_backend)
         enrich_backend.set_elastic(elastic_enrich)
         logging.info("Refreshing identities fields in enriched index %s",
                      elastic_enrich.index_url)
         self.__refresh_identities(enrich_backend)
Пример #27
0
    def test_data_load_error(self):
        """Test whether an exception is thrown when inserting data intO"""

        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        logging.info("Loading data in: %s", es_con)
        connector = get_connectors()['functest']
        with open(os.path.join("data", "functest_wrong.json")) as f:
            items = json.load(f)
            es_index = "test_functest"
            clean = True
            perceval_backend = None
            ocean_backend = connector[1](perceval_backend)
            elastic_ocean = get_elastic(es_con, es_index, clean, ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)

            inserted = self.__data2es(items, ocean_backend)
            self.assertGreater(len(items), inserted)
Пример #28
0
    def _get_enrich_backend(self):
        db_projects_map = None
        json_projects_map = None
        clean = False
        connector = get_connector_from_name(self.get_backend(self.backend_section))

        if 'projects_file' in self.conf['projects']:
            json_projects_map = self.conf['projects']['projects_file']

        enrich_backend = connector[2](self.db_sh, db_projects_map, json_projects_map,
                                      self.db_user, self.db_password, self.db_host)
        elastic_enrich = get_elastic(self.conf['es_enrichment']['url'],
                                     self.conf[self.backend_section]['enriched_index'],
                                     clean, enrich_backend)
        enrich_backend.set_elastic(elastic_enrich)

        if self.db_unaffiliate_group:
            enrich_backend.unaffiliated_group = self.db_unaffiliate_group

        return enrich_backend
Пример #29
0
    def get_enrich_backend(self):
        db_projects_map = None
        json_projects_map = None
        clean = False
        connector = get_connector_from_name(self.backend_name)

        enrich_backend = connector[2](self.db_sh, db_projects_map, json_projects_map,
                                      self.db_user, self.db_password, self.db_host)
        elastic_enrich = get_elastic(self.conf['es_enrichment'],
                                     self.conf[self.backend_name]['enriched_index'],
                                     clean, enrich_backend)
        enrich_backend.set_elastic(elastic_enrich)

        if 'github' in self.conf.keys() and \
            'backend_token' in self.conf['github'].keys() and \
            self.backend_name == "git":

            gh_token = self.conf['github']['backend_token']
            enrich_backend.set_github_token(gh_token)

        return enrich_backend
Пример #30
0
def enrich_backends(url,
                    clean,
                    debug=False,
                    redis=None,
                    db_projects_map=None,
                    db_sortinghat=None):
    ''' Enrich all existing indexes '''

    logging.info("Enriching repositories")

    elastic = get_elastic(url, ConfOcean.get_index(), clean)
    ConfOcean.set_elastic(elastic)
    fetch_cache = False

    q = Queue('update', connection=Redis(redis), async=async_)

    for repo in ConfOcean.get_repos():
        enrich_task = q.enqueue(enrich_backend, url, clean,
                                repo['backend_name'], repo['backend_params'],
                                repo['index'], repo['index_enrich'],
                                db_projects_map, db_sortinghat)
        logging.info("Queued job")
        logging.info(enrich_task)
Пример #31
0
 def retain_data(retention_time, es_url, index):
     elastic = get_elastic(es_url, index)
     elastic.delete_items(retention_time)
Пример #32
0
    app_init = datetime.now()

    args = get_params()

    config_logging(args.debug)

    if args.index is None:
        # Extract identities from all indexes
        pass
    else:
        logging.info("Extracting identities from: %s" % (args.index))
        perceval_params = get_perceval_params(args.elastic_url, args.index)
        backend_name = perceval_params['backend']
        connector = get_connector_from_name(backend_name)
        perceval_backend_class = connector[0]
        ocean_backend_class = connector[1]
        perceval_backend = None  # Don't use perceval

        perceval_backend = perceval_backend_class(**perceval_params)

        obackend = ocean_backend_class(perceval_backend, incremental=False)
        obackend.set_elastic(get_elastic(args.elastic_url, args.index))

        identities = get_identities(obackend)
        SortingHat.add_identities(identities, backend_name)

        # Add the identities to Sorting Hat

        print("Total identities processed: %i" % (len(identities)))