def _test_raw_to_enrich(self, sortinghat=False, projects=False): """Test whether raw indexes are properly enriched""" # populate raw index perceval_backend = None clean = True ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) data2es(self.items, ocean_backend) # populate enriched index if not sortinghat and not projects: enrich_backend = self.connectors[self.connector][2]() elif sortinghat and not projects: enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elif not sortinghat and projects: enrich_backend = self.connectors[self.connector][2](json_projects_map=FILE_PROJECTS, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) # Load SH identities if sortinghat: load_identities(ocean_backend, enrich_backend) raw_count = len([item for item in ocean_backend.fetch()]) enrich_count = enrich_backend.enrich_items(ocean_backend) # self._test_csv_mappings(sortinghat) return {'raw': raw_count, 'enrich': enrich_count}
def execute(self): # FIXME this should be called just once # code = 0 when command success code = Init(**self.sh_kwargs).run(self.db_sh, '--reuse') if not self.backend_section: logger.error( "Backend not configured in TaskIdentitiesCollection %s", self.backend_section) return backend_conf = self.config.get_conf()[self.backend_section] if 'collect' in backend_conf and not backend_conf['collect']: logger.info("Don't load ids from a backend without collection %s", self.backend_section) return if self.load_ids: logger.info("[%s] Gathering identities from raw data", self.backend_section) enrich_backend = self._get_enrich_backend() ocean_backend = self._get_ocean_backend(enrich_backend) load_identities(ocean_backend, enrich_backend)
def _test_refresh_identities(self): """Test refresh identities""" # populate raw index perceval_backend = None clean = True ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) data2es(self.items, ocean_backend) # populate enriched index enrich_backend = self.connectors[self.connector][2]() load_identities(ocean_backend, enrich_backend) enrich_backend = self.connectors[self.connector][2]( db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) enrich_backend.enrich_items(ocean_backend) total = refresh_identities(enrich_backend) return total
def _test_raw_to_enrich(self, sortinghat=False, projects=False, pair_programming=False): """Test whether raw indexes are properly enriched""" # populate raw index perceval_backend = None clean = True self.ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend) self.ocean_backend.set_elastic(elastic_ocean) data2es(self.items, self.ocean_backend) # populate enriched index if not sortinghat and not projects: self.enrich_backend = self.connectors[self.connector][2]() elif sortinghat and not projects: self.enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elif not sortinghat and projects: self.enrich_backend = self.connectors[self.connector][2](json_projects_map=FILE_PROJECTS, db_user=self.db_user, db_password=self.db_password) if pair_programming: self.enrich_backend.pair_programming = pair_programming elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend, self.enrich_aliases) self.enrich_backend.set_elastic(elastic_enrich) categories = {1: 'General', 6: 'Technical', 2: 'Ecosystem', 3: 'Staff'} self.enrich_backend.categories = MagicMock(return_value=categories) categories_tree = {1: {}, 6: {}, 2: {}, 3: {}} self.enrich_backend.categories_tree = MagicMock(return_value=categories_tree) # Load SH identities if sortinghat: load_identities(self.ocean_backend, self.enrich_backend) raw_count = len([item for item in self.ocean_backend.fetch()]) enrich_count = self.enrich_backend.enrich_items(self.ocean_backend) # self._test_csv_mappings(sortinghat) return {'raw': raw_count, 'enrich': enrich_count}
def execute(self): # FIXME this should be called just once # code = 0 when command success code = Init(**self.sh_kwargs).run(self.db_sh, '--reuse') if not self.backend_section: logger.error("Backend not configured in TaskIdentitiesCollection %s", self.backend_section) return backend_conf = self.config.get_conf()[self.backend_section] if 'collect' in backend_conf and not backend_conf['collect']: logger.info("Don't load ids from a backend without collection %s", self.backend_section) return if self.load_ids: logger.info("[%s] Gathering identities from raw data", self.backend_section) enrich_backend = self._get_enrich_backend() ocean_backend = self._get_ocean_backend(enrich_backend) load_identities(ocean_backend, enrich_backend)
def enrich_git_items(es, index_git_raw, commits_sha_list, project, db_config): commits = _get_git_commits(es, index_git_raw, commits_sha_list) projects_file_path = _create_projects_file(project, "git", commits) logger.info("Total git track items to be enriched: %i", len(commits)) enriched_items = [] enricher = GitEnrich(db_sortinghat=db_config['database'], db_user=db_config['user'], db_password=db_config['password'], db_host=db_config['host'], json_projects_map=projects_file_path) # First load identities load_identities(commits, enricher) # Then enrich for commit in commits: enriched_items.append(enricher.get_rich_item(commit)) os.unlink(projects_file_path) return enriched_items
def test_refresh_identities(self): """Test refresh identities""" # populate raw index perceval_backend = Discourse('https://example.com', api_token='1234', api_username='******') clean = True self.ocean_backend = self.connectors[self.connector][1](perceval_backend) elastic_ocean = get_elastic(self.es_con, self.ocean_index, clean, self.ocean_backend) self.ocean_backend.set_elastic(elastic_ocean) data2es(self.items, self.ocean_backend) # populate enriched index self.enrich_backend = self.connectors[self.connector][2]() load_identities(self.ocean_backend, self.enrich_backend) self.enrich_backend = self.connectors[self.connector][2](db_sortinghat=DB_SORTINGHAT, db_user=self.db_user, db_password=self.db_password) elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend) self.enrich_backend.set_elastic(elastic_enrich) self.enrich_backend.enrich_items(self.ocean_backend) total = refresh_identities(self.enrich_backend)
def enrich_gerrit_items(es, index_gerrit_raw, gerrit_numbers, project, db_config): reviews = _get_gerrit_reviews(es, index_gerrit_raw, gerrit_numbers) projects_file_path = _create_projects_file(project, "gerrit", reviews) logger.info("Total gerrit track items to be enriched: %i", len(reviews)) enriched_items = [] enricher = GerritEnrich(db_sortinghat=db_config['database'], db_user=db_config['user'], db_password=db_config['password'], db_host=db_config['host'], json_projects_map=projects_file_path) # First load identities load_identities(reviews, enricher) # Then enrich for review in reviews: enriched_items.append(enricher.get_rich_item(review)) os.unlink(projects_file_path) return enriched_items