def _test_refresh_identities(self): """Test refresh identities""" # populate raw index perceval_backend = None clean = True ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) data2es(self.items, ocean_backend) # populate enriched index enrich_backend = self.connectors[self.connector][2]() load_identities(ocean_backend, enrich_backend) enrich_backend = self.connectors[self.connector][2]( db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) enrich_backend.enrich_items(ocean_backend) total = refresh_identities(enrich_backend) return total
def _test_raw_to_enrich(self, sortinghat=False, projects=False): """Test whether raw indexes are properly enriched""" # populate raw index perceval_backend = None clean = True ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) data2es(self.items, ocean_backend) # populate enriched index if not sortinghat and not projects: enrich_backend = self.connectors[self.connector][2]() elif sortinghat and not projects: enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elif not sortinghat and projects: enrich_backend = self.connectors[self.connector][2](json_projects_map=FILE_PROJECTS, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) # Load SH identities if sortinghat: load_identities(ocean_backend, enrich_backend) raw_count = len([item for item in ocean_backend.fetch()]) enrich_count = enrich_backend.enrich_items(ocean_backend) # self._test_csv_mappings(sortinghat) return {'raw': raw_count, 'enrich': enrich_count}
def _test_raw_to_enrich_anonymized(self, sortinghat=False, projects=False): """Test whether raw indexes are properly enriched""" # populate raw index perceval_backend = None clean = True self.ocean_backend = self.connectors[self.connector][1]( perceval_backend, anonymize=True) elastic_ocean = get_elastic(self.es_con, self.ocean_index_anonymized, clean, self.ocean_backend) self.ocean_backend.set_elastic(elastic_ocean) data2es(self.items, self.ocean_backend) # populate enriched index self.enrich_backend = self.connectors[self.connector][2]() elastic_enrich = get_elastic(self.es_con, self.enrich_index_anonymized, clean, self.enrich_backend, self.enrich_aliases) self.enrich_backend.set_elastic(elastic_enrich) raw_count = len([item for item in self.ocean_backend.fetch()]) enrich_count = self.enrich_backend.enrich_items(self.ocean_backend) return {'raw': raw_count, 'enrich': enrich_count}
def test_enrich(self, sortinghat=False, projects=False): """Test enrich all sources""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] logging.info("Enriching data in: %s", es_con) connectors = get_connectors() for con in sorted(connectors.keys()): perceval_backend = None ocean_index = "test_" + con enrich_index = "test_" + con + "_enrich" clean = False ocean_backend = connectors[con][1](perceval_backend) elastic_ocean = get_elastic(es_con, ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) clean = True if not sortinghat and not projects: enrich_backend = connectors[con][2]() elif sortinghat and not projects: enrich_backend = connectors[con][2]( db_sortinghat=DB_SORTINGHAT) elif not sortinghat and projects: enrich_backend = connectors[con][2]( db_projects_map=DB_PROJECTS) elastic_enrich = get_elastic(es_con, enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) if sortinghat: # Load SH identities load_identities(ocean_backend, enrich_backend) enrich_count = enrich_backend.enrich_items(ocean_backend) if enrich_count is not None: logging.info("Total items enriched %i ", enrich_count)
def _test_refresh_project(self): """Test refresh project field""" # populate raw index perceval_backend = None clean = True ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) data2es(self.items, ocean_backend) # populate enriched index enrich_backend = self.connectors[self.connector][2]( db_projects_map=DB_PROJECTS, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) enrich_backend.enrich_items(ocean_backend) total = refresh_projects(enrich_backend) return total
def _test_study(self, test_study): """Test the execution of a study""" # populate raw index perceval_backend = None clean = True ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) data2es(self.items, ocean_backend) # populate enriched index enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) enrich_backend.enrich_items(ocean_backend) for study in enrich_backend.studies: if test_study == study.__name__: found = (study, ocean_backend, enrich_backend) return found
def _get_enrich_backend(self): db_projects_map = None json_projects_map = None clean = False connector = get_connector_from_name( self.get_backend(self.backend_section)) if 'projects_file' in self.conf['projects']: json_projects_map = self.conf['projects']['projects_file'] enrich_backend = connector[2](self.db_sh, db_projects_map, json_projects_map, self.db_user, self.db_password, self.db_host) elastic_enrich = get_elastic( self.conf['es_enrichment']['url'], self.conf[self.backend_section]['enriched_index'], clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) if 'github' in self.conf.keys() and \ 'backend_token' in self.conf['github'].keys() and \ self.get_backend(self.backend_section) == "git": gh_token = self.conf['github']['backend_token'] enrich_backend.set_github_token(gh_token) if 'unaffiliated_group' in self.conf['sortinghat']: enrich_backend.unaffiliated_group = self.conf['sortinghat'][ 'unaffiliated_group'] return enrich_backend
def test_add_alias(self): """Test whether add_alias properly works""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] tmp_index = "test-add-aliases" tmp_index_url = es_con + "/" + tmp_index enrich_backend = get_connectors()["git"][2]() elastic_enrich = get_elastic(es_con, tmp_index, True, enrich_backend) self._enrich.set_elastic(elastic_enrich) # add alias with self.assertLogs(logger, level='INFO') as cm: self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS) self.assertEqual(cm.output[0], 'INFO:grimoire_elk.elastic:Alias %s created on %s.' % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url))) r = self._enrich.requests.get(self._enrich.elastic.index_url + "/_alias", headers=HEADER_JSON, verify=False) self.assertIn(DEMOGRAPHICS_ALIAS, r.json()[self._enrich.elastic.index]['aliases']) # add alias again with self.assertLogs(logger, level='DEBUG') as cm: self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS) self.assertEqual(cm.output[0], 'DEBUG:grimoire_elk.elastic:Alias %s already exists on %s.' % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url))) requests.delete(tmp_index_url, verify=False)
def _get_ocean_backend(self, enrich_backend): backend_cmd = None no_incremental = False clean = False from .task_projects import TaskProjects repos = TaskProjects.get_repos_by_backend_section(self.backend_section) if len(repos) == 1: # Support for filter raw when we have one repo (filter_raw, filters_raw_prefix) = self.__filters_raw(repos[0]) if filter_raw or filters_raw_prefix: logger.info("Using %s %s for getting identities from raw", filter_raw, filters_raw_prefix) ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental, filter_raw, filters_raw_prefix) else: ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental) elastic_ocean = get_elastic( self._get_collection_url(), self.conf[self.backend_section]['raw_index'], clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) return ocean_backend
def _get_ocean_backend(self, enrich_backend): backend_cmd = None no_incremental = False clean = False from .task_projects import TaskProjects repos = TaskProjects.get_repos_by_backend_section(self.backend_section) if len(repos) == 1: # Support for filter raw when we have one repo repo = repos[0] repo, repo_labels = self._extract_repo_labels(self.backend_section, repo) p2o_args = self._compose_p2o_params(self.backend_section, repo) filter_raw = p2o_args['filter-raw'] if 'filter-raw' in p2o_args else None ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental, filter_raw) else: ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental) elastic_ocean = get_elastic(self._get_collection_url(), self.conf[self.backend_section]['raw_index'], clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) return ocean_backend
def __autorefresh_studies(self, cfg): """Execute autorefresh for areas of code study if configured""" if 'studies' not in self.conf[self.backend_section] or \ 'enrich_areas_of_code:git' not in self.conf[self.backend_section]['studies']: logger.debug("Not doing autorefresh for studies, Areas of Code study is not active.") return aoc_index = self.conf['enrich_areas_of_code:git'].get('out_index', GitEnrich.GIT_AOC_ENRICHED) # if `out_index` exists but has no value, use default if not aoc_index: aoc_index = GitEnrich.GIT_AOC_ENRICHED logger.debug("Autorefresh for Areas of Code study index: %s", aoc_index) es = Elasticsearch([self.conf['es_enrichment']['url']], timeout=100, verify_certs=self._get_enrich_backend().elastic.requests.verify) if not es.indices.exists(index=aoc_index): logger.debug("Not doing autorefresh, index doesn't exist for Areas of Code study") return logger.debug("Doing autorefresh for Areas of Code study") # Create a GitEnrich backend tweaked to work with AOC index aoc_backend = GitEnrich(self.db_sh, None, cfg['projects']['projects_file'], self.db_user, self.db_password, self.db_host) aoc_backend.mapping = None aoc_backend.roles = ['author'] elastic_enrich = get_elastic(self.conf['es_enrichment']['url'], aoc_index, clean=False, backend=aoc_backend) aoc_backend.set_elastic(elastic_enrich) self.__autorefresh(aoc_backend, studies=True)
def test_refresh_project(self): """Test refresh project field for all sources""" # self.test_enrich_sh() # Load the identities in ES config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] db_user = '' db_password = '' if 'Database' in config: if 'user' in config['Database']: db_user = config['Database']['user'] if 'password' in config['Database']: db_password = config['Database']['password'] logging.info("Refreshing data in: %s", es_con) connectors = get_connectors() for con in sorted(connectors.keys()): enrich_index = "test_" + con + "_enrich" enrich_backend = connectors[con][2](db_projects_map=DB_PROJECTS, db_user=db_user, db_password=db_password) clean = False elastic_enrich = get_elastic(es_con, enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) logging.info("Refreshing projects fields in enriched index %s", elastic_enrich.index_url) self.__refresh_projects(enrich_backend)
def test_enrich(self, sortinghat=False, projects=False): """Test enrich all sources""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] db_user = '' db_password = '' if 'Database' in config: if 'user' in config['Database']: db_user = config['Database']['user'] if 'password' in config['Database']: db_password = config['Database']['password'] logging.info("Enriching data in: %s", es_con) connectors = get_connectors() for con in sorted(connectors.keys()): perceval_backend = None ocean_index = "test_" + con enrich_index = "test_" + con + "_enrich" clean = False ocean_backend = connectors[con][1](perceval_backend) elastic_ocean = get_elastic(es_con, ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) clean = True if not sortinghat and not projects: enrich_backend = connectors[con][2]() elif sortinghat and not projects: enrich_backend = connectors[con][2]( db_sortinghat=DB_SORTINGHAT, db_user=db_user, db_password=db_password) elif not sortinghat and projects: enrich_backend = connectors[con][2]( db_projects_map=DB_PROJECTS, db_user=db_user, db_password=db_password) elastic_enrich = get_elastic(es_con, enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) if sortinghat: # Load SH identities load_identities(ocean_backend, enrich_backend) raw_count = len([item for item in ocean_backend.fetch()]) enrich_count = enrich_backend.enrich_items(ocean_backend) self.assertEqual(raw_count, enrich_count)
def _test_study(self, test_study, projects_json_repo=None, projects_json=None, prjs_map=None): """Test the execution of a study""" # populate raw index perceval_backend = None clean = True self.ocean_backend = self.connectors[self.connector][1]( perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend) self.ocean_backend.set_elastic(elastic_ocean) data2es(self.items, self.ocean_backend) # populate enriched index self.enrich_backend = self.connectors[self.connector][2]( db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password, db_projects_map=self.db_projects_map) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend) self.enrich_backend.set_elastic(elastic_enrich) if projects_json: self.enrich_backend.json_projects = projects_json if projects_json_repo: self.enrich_backend.projects_json_repo = projects_json_repo if prjs_map: self.enrich_backend.prjs_map = prjs_map self.enrich_backend.enrich_items(self.ocean_backend) for study in self.enrich_backend.studies: if test_study == study.__name__: found = (study, self.ocean_backend, self.enrich_backend) break return found
def get_perceval_params(url, index): logging.info("Get perceval params for index: %s" % (index)) elastic = get_elastic(url, ConfOcean.get_index()) ConfOcean.set_elastic(elastic) r = requests.get(elastic.index_url+"/repos/"+index) params = r.json()['_source']['params'] return params
def get_perceval_params(url, index): logging.info("Get perceval params for index: %s" % (index)) elastic = get_elastic(url, ConfOcean.get_index()) ConfOcean.set_elastic(elastic) r = requests.get(elastic.index_url + "/repos/" + index) params = r.json()['_source']['params'] return params
def _test_raw_to_enrich(self, sortinghat=False, projects=False, pair_programming=False): """Test whether raw indexes are properly enriched""" # populate raw index perceval_backend = None clean = True self.ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend) self.ocean_backend.set_elastic(elastic_ocean) data2es(self.items, self.ocean_backend) # populate enriched index if not sortinghat and not projects: self.enrich_backend = self.connectors[self.connector][2]() elif sortinghat and not projects: self.enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elif not sortinghat and projects: self.enrich_backend = self.connectors[self.connector][2](json_projects_map=FILE_PROJECTS, db_user=self.db_user, db_password=self.db_password) if pair_programming: self.enrich_backend.pair_programming = pair_programming elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend, self.enrich_aliases) self.enrich_backend.set_elastic(elastic_enrich) categories = {1: 'General', 6: 'Technical', 2: 'Ecosystem', 3: 'Staff'} self.enrich_backend.categories = MagicMock(return_value=categories) categories_tree = {1: {}, 6: {}, 2: {}, 3: {}} self.enrich_backend.categories_tree = MagicMock(return_value=categories_tree) # Load SH identities if sortinghat: load_identities(self.ocean_backend, self.enrich_backend) raw_count = len([item for item in self.ocean_backend.fetch()]) enrich_count = self.enrich_backend.enrich_items(self.ocean_backend) # self._test_csv_mappings(sortinghat) return {'raw': raw_count, 'enrich': enrich_count}
def _test_items_to_raw_anonymized(self): clean = True perceval_backend = None self.ocean_backend = self.connectors[self.connector][1](perceval_backend, anonymize=True) elastic_ocean = get_elastic(self.es_con, self.ocean_index_anonymized, clean, self.ocean_backend, self.ocean_aliases) self.ocean_backend.set_elastic(elastic_ocean) raw_items = data2es(self.items, self.ocean_backend) return {'items': len(self.items), 'raw': raw_items}
def test_refresh_project(self): """Test refresh project field for all sources""" # populate raw index perceval_backend = Discourse('https://example.com', api_token='1234', api_username='******') clean = True self.ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend) self.ocean_backend.set_elastic(elastic_ocean) data2es(self.items, self.ocean_backend) # populate enriched index self.enrich_backend = self.connectors[self.connector][2](db_projects_map=DB_PROJECTS, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend) self.enrich_backend.set_elastic(elastic_enrich) self.enrich_backend.enrich_items(self.ocean_backend) total = refresh_projects(self.enrich_backend)
def _test_items_to_raw(self): """Test whether fetched items are properly loaded to ES""" clean = True perceval_backend = None ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) raw_items = data2es(self.items, ocean_backend) return {'items': len(self.items), 'raw': raw_items}
def collect_arthur_items(repo): aitems = self.__feed_backend_arthur(repo) if not aitems: return connector = get_connector_from_name(self.backend_section) klass = connector[1] # Ocean backend for the connector ocean_backend = klass(None) es_col_url = self._get_collection_url() es_index = self.conf[self.backend_section]['raw_index'] clean = False elastic_ocean = get_elastic(es_col_url, es_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) ocean_backend.feed(arthur_items=aitems)
def test_refresh_identities(self): """Test refresh identities""" # populate raw index perceval_backend = Discourse('https://example.com', api_token='1234', api_username='******') clean = True self.ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend) self.ocean_backend.set_elastic(elastic_ocean) data2es(self.items, self.ocean_backend) # populate enriched index self.enrich_backend = self.connectors[self.connector][2]() load_identities(self.ocean_backend, self.enrich_backend) self.enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend) self.enrich_backend.set_elastic(elastic_enrich) self.enrich_backend.enrich_items(self.ocean_backend) total = refresh_identities(self.enrich_backend)
def _get_ocean_backend(self, enrich_backend): backend_cmd = None no_incremental = False clean = False ocean_backend = get_ocean_backend(backend_cmd, enrich_backend, no_incremental) elastic_ocean = get_elastic(self._get_collection_url(), self.conf[self.backend_name]['raw_index'], clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) return ocean_backend
def feed_backends(url, clean, debug=False, redis=None): ''' Update Ocean for all existing backends ''' logging.info("Updating all Ocean") elastic = get_elastic(url, ConfOcean.get_index(), clean) ConfOcean.set_elastic(elastic) fetch_cache = False q = Queue('update', connection=Redis(redis), async=async_) for repo in ConfOcean.get_repos(): task_feed = q.enqueue(feed_backend, url, clean, fetch_cache, repo['backend_name'], repo['backend_params'], repo['index'], repo['index_enrich'], repo['project']) logging.info("Queued job") logging.info(task_feed)
def test_data_load(self): """Test load all sources JSON data into ES""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] logging.info("Loading data in: %s", es_con) connectors = get_connectors() for con in sorted(connectors.keys()): with open(os.path.join("data", con + ".json")) as f: items = json.load(f) es_index = "test_" + con clean = True perceval_backend = None ocean_backend = connectors[con][1](perceval_backend) elastic_ocean = get_elastic(es_con, es_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) self.__data2es(items, ocean_backend)
def test_refresh_identities(self): """Test refresh identities for all sources""" # self.test_enrich_sh() # Load the identities in ES config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] logging.info("Refreshing data in: %s", es_con) connectors = get_connectors() for con in sorted(connectors.keys()): enrich_index = "test_" + con + "_enrich" enrich_backend = connectors[con][2](db_sortinghat=DB_SORTINGHAT) clean = False elastic_enrich = get_elastic(es_con, enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) logging.info("Refreshing identities fields in enriched index %s", elastic_enrich.index_url) self.__refresh_identities(enrich_backend)
def test_data_load_error(self): """Test whether an exception is thrown when inserting data intO""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] logging.info("Loading data in: %s", es_con) connector = get_connectors()['functest'] with open(os.path.join("data", "functest_wrong.json")) as f: items = json.load(f) es_index = "test_functest" clean = True perceval_backend = None ocean_backend = connector[1](perceval_backend) elastic_ocean = get_elastic(es_con, es_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) inserted = self.__data2es(items, ocean_backend) self.assertGreater(len(items), inserted)
def _get_enrich_backend(self): db_projects_map = None json_projects_map = None clean = False connector = get_connector_from_name(self.get_backend(self.backend_section)) if 'projects_file' in self.conf['projects']: json_projects_map = self.conf['projects']['projects_file'] enrich_backend = connector[2](self.db_sh, db_projects_map, json_projects_map, self.db_user, self.db_password, self.db_host) elastic_enrich = get_elastic(self.conf['es_enrichment']['url'], self.conf[self.backend_section]['enriched_index'], clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) if self.db_unaffiliate_group: enrich_backend.unaffiliated_group = self.db_unaffiliate_group return enrich_backend
def get_enrich_backend(self): db_projects_map = None json_projects_map = None clean = False connector = get_connector_from_name(self.backend_name) enrich_backend = connector[2](self.db_sh, db_projects_map, json_projects_map, self.db_user, self.db_password, self.db_host) elastic_enrich = get_elastic(self.conf['es_enrichment'], self.conf[self.backend_name]['enriched_index'], clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) if 'github' in self.conf.keys() and \ 'backend_token' in self.conf['github'].keys() and \ self.backend_name == "git": gh_token = self.conf['github']['backend_token'] enrich_backend.set_github_token(gh_token) return enrich_backend
def enrich_backends(url, clean, debug=False, redis=None, db_projects_map=None, db_sortinghat=None): ''' Enrich all existing indexes ''' logging.info("Enriching repositories") elastic = get_elastic(url, ConfOcean.get_index(), clean) ConfOcean.set_elastic(elastic) fetch_cache = False q = Queue('update', connection=Redis(redis), async=async_) for repo in ConfOcean.get_repos(): enrich_task = q.enqueue(enrich_backend, url, clean, repo['backend_name'], repo['backend_params'], repo['index'], repo['index_enrich'], db_projects_map, db_sortinghat) logging.info("Queued job") logging.info(enrich_task)
def retain_data(retention_time, es_url, index): elastic = get_elastic(es_url, index) elastic.delete_items(retention_time)
app_init = datetime.now() args = get_params() config_logging(args.debug) if args.index is None: # Extract identities from all indexes pass else: logging.info("Extracting identities from: %s" % (args.index)) perceval_params = get_perceval_params(args.elastic_url, args.index) backend_name = perceval_params['backend'] connector = get_connector_from_name(backend_name) perceval_backend_class = connector[0] ocean_backend_class = connector[1] perceval_backend = None # Don't use perceval perceval_backend = perceval_backend_class(**perceval_params) obackend = ocean_backend_class(perceval_backend, incremental=False) obackend.set_elastic(get_elastic(args.elastic_url, args.index)) identities = get_identities(obackend) SortingHat.add_identities(identities, backend_name) # Add the identities to Sorting Hat print("Total identities processed: %i" % (len(identities)))