def main(): args = parse_args() if args.config_template_file is not None: Config.create_config_file(args.config_template_file) Logger.info("Sample config file created in {}".format( args.config_template_file)) return 0 elif args.config_files is None: Logger.error("Option -t or -c is required") return 1 try: config = Config(args.config_files[0], args.config_files[1:]) config_dict = config.get_conf() logs_dir = config_dict['general']['logs_dir'] debug_mode = config_dict['general']['debug'] logger = setup_logs(logs_dir, debug_mode) except RuntimeError as error: print("Error while consuming configuration: {}".format(error)) return 1 if args.phases: logger.info("Executing sirmordred for phases: {}".format(args.phases)) # HACK: the internal dict of Config is modified directly # In manual phases execute sirmordred as an script config_dict['general']['update'] = False for phase in config_dict['phases']: config_dict['phases'][ phase] = True if phase in args.phases else False SirMordred(config).start()
def test_run(self): """Test whether the Task could be run""" config = Config(CONF_FILE) cfg = config.get_conf() # We need to load the projects TaskProjects(config).execute() backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.execute(), None) # Check that the enrichment went well es_collection = cfg['es_collection']['url'] es_enrichment = cfg['es_enrichment']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION]['enriched_index'] r = requests.get(raw_index + "/_search?size=0") raw_items = r.json()['hits']['total'] r = requests.get(enrich_index + "/_search?size=0") enriched_items = r.json()['hits']['total'] # the number of raw items is bigger since the enriched items are generated based on: # https://github.com/VizGrimoire/GrimoireLib # --filters-raw-prefix data.files.file:grimoirelib_alch data.files.file:README.md # see [git] section in tests/test-projects.json self.assertGreater(raw_items, enriched_items)
def test_create_config_file(self): """Test whether a config file is correctly created""" tmp_path = tempfile.mktemp(prefix='mordred_') config = Config(CONF_FULL) config.create_config_file(tmp_path) copied_config = Config(tmp_path)
def test_set_param_not_found(self): """Test whether an error is logged if a param does not exist""" config = Config(CONF_FULL) with self.assertLogs(logger, level='ERROR') as cm: config.set_param("twitter", "acme", "true") self.assertEqual(cm.output[-1], 'ERROR:sirmordred.config:Config section twitter and param acme not exists')
def test_set_param(self): """Test whether a param is correctly modified""" config = Config(CONF_FULL) self.assertFalse(config.conf['twitter']['collect']) config.set_param("twitter", "collect", "true") self.assertTrue(config.conf['twitter']['collect'])
def test__get_projects_from_url(self): """Test downloading projects from an URL """ setup_http_server() projects_url = 'http://localhost/projects.json' config = Config(CONF_FILE) config.set_param('projects', 'projects_url', projects_url) task = TaskProjects(config) self.assertEqual(task.execute(), None) projects = task.get_projects() self.assertTrue(URL_PROJECTS_MAIN in projects)
def test_get_data_sources(self): """Test whether all data sources are properly retrieved""" config = Config(CONF_FULL) expected = ['askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse', 'dockerhub', 'functest', 'gerrit', 'git', 'gitlab', 'github', 'google_hits', 'groupsio', 'hyperkitty', 'jenkins', 'jira', 'mbox', 'meetup', 'mediawiki', 'mozillaclub', 'nntp', 'phabricator', 'pipermail', 'puppetforge', 'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'supybot', 'telegram', 'twitter'] data_sources = config.get_data_sources() self.assertEqual(len(data_sources), len(expected)) self.assertEqual(data_sources.sort(), expected.sort())
def setUp(self): config = Config(CONF_FILE) sh = config.get_conf()['sortinghat'] self.sh_kwargs = {'user': sh['user'], 'password': sh['password'], 'database': sh['database'], 'host': sh['host'], 'port': None} # Clean the database to start an empty state Database.drop(**self.sh_kwargs) # Create command Database.create(**self.sh_kwargs) self.sh_db = Database(**self.sh_kwargs)
def test_compose_p2o_params(self): """Test whether p2o params are built correctly for a backend and a repository""" config = Config(CONF_FILE) task = Task(config) params = task._compose_p2o_params( "stackexchange", "https://stackoverflow.com/questions/tagged/example") self.assertDictEqual( params, {'url': "https://stackoverflow.com/questions/tagged/example"}) params = task._compose_p2o_params( "mediawiki", "https://wiki-archive.opendaylight.org " "https://wiki-archive.opendaylight.org/view") self.assertDictEqual( params, { 'url': "https://wiki-archive.opendaylight.org " "https://wiki-archive.opendaylight.org/view" }) params = task._compose_p2o_params( "mediawiki", "https://wiki-archive.opendaylight.org " "https://wiki-archive.opendaylight.org/view " "--filter-no-collection=true") self.assertDictEqual( params, { 'url': "https://wiki-archive.opendaylight.org " "https://wiki-archive.opendaylight.org/view", "filter-no-collection": "true" })
def micro_mordred(cfg_path, backend_sections, repos_to_check, raw, identities_load, identities_merge, enrich, panels): """Execute the Mordred tasks using the configuration file (`cfg_path`). :param cfg_path: the path of a Mordred configuration file :param backend_sections: the backend sections where the raw/enrich/identities phases will be executed :param repos_to_check: process a repository only if it is in this list, or `None` for all repos :param raw: if true, it activates the collection of raw data :param identities_load: if true, it activates the identities loading process :param identities_merge: if true, it activates the identities merging process :param enrich: if true, it activates the enrichment of the raw data :param panels: if true, it activates the upload of all panels declared in the configuration file """ config = Config(cfg_path) if raw: for backend in backend_sections: get_raw(config, backend, repos_to_check) if identities_load: get_identities_load(config) if identities_merge: get_identities_merge(config) if enrich: for backend in backend_sections: get_enrich(config, backend, repos_to_check) if panels: get_panels(config)
def micro_mordred(cfg_path, backend_sections, raw, arthur, identities_load, identities_merge, enrich, panels): """Execute the Mordred tasks using the configuration file (`cfg_path`). :param cfg_path: the path of a Mordred configuration file :param backend_sections: the backend sections where the raw/enrich/identities phases will be executed :param raw: if true, it activates the collection of raw data :param arthur: if true, it enables Arthur to collect the raw data :param identities_load: if true, it activates the identities loading process :param identities_merge: if true, it activates the identities merging process :param enrich: if true, it activates the enrichment of the raw data :param panels: if true, it activates the upload of all panels declared in the configuration file """ config = Config(cfg_path) if raw: for backend in backend_sections: get_raw(config, backend, arthur) if identities_load: get_identities_load(config) if identities_merge: get_identities_merge(config) if enrich: for backend in backend_sections: get_enrich(config, backend) if panels: get_panels(config)
def test_backend_params(self): """Test whether the backend parameters are initializated""" config = Config(CONF_FILE) backend_section = GITHUB_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) params = task._compose_perceval_params(GITHUB_BACKEND_SECTION, GITHUB_REPO) expected_params = [ 'grimoirelab', 'perceval', '--api-token', 'XXXXX', '--sleep-time', '300', '--sleep-for-rate', '--category', 'issue', '--archive-path', '/tmp/test_github_archive' ] self.assertEqual(len(params), len(expected_params)) for p in params: self.assertTrue(p in expected_params)
def _get_repos_by_backend(self): # # return dict with backend and list of repositories # output = {} projects = TaskProjects.get_projects() for pro in projects: # remove duplicates in backends_section with list(set(..)) backend_sections = list( set([ sect for sect in projects[pro].keys() for backend_section in Config.get_backend_sections() if sect and sect.startswith(backend_section) ])) # sort backends section backend_sections.sort() for backend_section in backend_sections: if backend_section not in output: output[backend_section] = projects[pro][backend_section] else: output[backend_section] += projects[pro][backend_section] # backend could be in project/repo file but not enabled in # sirmordred conf file enabled = {} for k in output: if k in self.conf: enabled[k] = output[k] # logger.debug('repos to be retrieved: %s ', enabled) return enabled
def micro_mordred(cfg_path, backend_sections, raw, arthur, identities, enrich, panels): """Execute the raw and/or the enrich phases of a given backend section defined in a Mordred configuration file. :param cfg_path: the path of a Mordred configuration file :param backend_sections: the backend sections where the raw and/or enrich phases will be executed :param raw: if true, it activates the collection of raw data :param arthur: if true, it enables Arthur to collect the raw data :param identities: if true, it activates the identities merge in SortingHat :param enrich: if true, it activates the collection of enrich data :param panels: if true, it activates the upload of panels """ config = Config(cfg_path) if raw: for backend in backend_sections: get_raw(config, backend, arthur) if identities: get_identities(config) if enrich: for backend in backend_sections: get_enrich(config, backend) if panels: get_panels(config)
def test_execute_from_archive(self): """Test fetching data from archives""" # proj_file -> 'test-projects-archive.json' stored within the conf file conf_file = 'archives-test.cfg' config = Config(conf_file) backend_sections = [ 'askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse', 'dockerhub', 'gerrit', 'github:issue', 'github:pull', 'gitlab:issue', 'gitlab:merge', 'google_hits', 'jenkins', 'jira', 'mediawiki', 'meetup', 'mozillaclub', 'nntp', 'phabricator', 'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'telegram', 'twitter' ] # We need to load the projects TaskProjects(config).execute() for backend_section in backend_sections: task = TaskRawDataCollection(config, backend_section=backend_section) task.execute() for backend_section in backend_sections: task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.execute(), None)
def test_compose_p2o_params(self): """Test whether p2o params are built correctly for a backend and a repository""" config = Config(CONF_FILE) task = Task(config) params = task._compose_p2o_params("stackexchange", "https://stackoverflow.com/questions/tagged/example") self.assertEqual(params, {'url': "https://stackoverflow.com/questions/tagged/example"})
def test_create_dashboard_multi_ds_kibiter_6(self): """ Test the creation of dashboards with filtered data sources """ config = Config(CONF_FILE) es_url = config.conf['es_enrichment']['url'] es_kibana_url = urljoin(es_url + "/", '.kibana') kibiter_api_url = urljoin(config.conf['panels']['kibiter_url'], KIBANA_SETTINGS_URL) kibiter_defaultIndex_url = kibiter_api_url + '/defaultIndex' kibiter_timePicker_url = kibiter_api_url + '/timepicker:timeDefaults' headers = {"Content-Type": "application/json", "kbn-xsrf": "true"} httpretty.register_uri(httpretty.GET, es_kibana_url, body={}, status=200) httpretty.register_uri(httpretty.POST, kibiter_defaultIndex_url, body={}, status=200, forcing_headers=headers) httpretty.register_uri(httpretty.POST, kibiter_timePicker_url, body={}, status=200, forcing_headers=headers) MockedTaskPanels.VERSION = '6.1.0' task = MockedTaskPanels(config) task.execute()
def _get_repos_by_backend(self): # # return dict with backend and list of repositories # output = {} projects = TaskProjects.get_projects() for backend_section in Config.get_backend_sections(): for pro in projects: backend = Task.get_backend(backend_section) if backend in projects[pro]: if backend_section not in output: output[backend_section] = projects[pro][backend] else: output[backend_section] += projects[pro][backend] # backend could be in project/repo file but not enabled in # sirmordred conf file enabled = {} for k in output: if k in self.conf: enabled[k] = output[k] # logger.debug('repos to be retrieved: %s ', enabled) return enabled
def test_initialization(self): """Test whether attributes are initializated""" config = Config(CONF_FILE) task = TaskProjects(config) self.assertEqual(task.config, config)
def test_init_studies(self): """Test whether studies' attributes are initializated""" config = Config(CONF_SLIM) self.assertIsNotNone(config.conf) self.assertIsNone(config.raw_conf) self.assertEqual(config.conf_list, [CONF_SLIM]) top_sections = config.conf.keys() demography_params = config.conf['enrich_demography:git'].keys() enrich_areas_of_code_params = config.conf[ 'enrich_areas_of_code:git'].keys() enrich_onion_git_params = config.conf['enrich_onion:git'].keys() enrich_onion_github_params = config.conf['enrich_onion:github'].keys() self.assertEqual(len(config.conf.keys()), 18) self.assertIn('general', top_sections) self.assertIn('projects', top_sections) self.assertIn('es_collection', top_sections) self.assertIn('es_enrichment', top_sections) self.assertIn('sortinghat', top_sections) self.assertIn('panels', top_sections) self.assertIn('phases', top_sections) self.assertIn('git', top_sections) self.assertIn('enrich_demography:git', top_sections) self.assertIn('date_field', demography_params) self.assertIn('author_field', demography_params) self.assertIn('enrich_areas_of_code:git', top_sections) self.assertIn('in_index', enrich_areas_of_code_params) self.assertIn('out_index', enrich_areas_of_code_params) self.assertIn('sort_on_field', enrich_areas_of_code_params) self.assertIn('no_incremental', enrich_areas_of_code_params) self.assertIn('enrich_onion:git', top_sections) self.assertIn('in_index', enrich_onion_git_params) self.assertIn('out_index', enrich_onion_git_params) self.assertIn('data_source', enrich_onion_git_params) self.assertIn('contribs_field', enrich_onion_git_params) self.assertIn('timeframe_field', enrich_onion_git_params) self.assertIn('sort_on_field', enrich_onion_git_params) self.assertIn('no_incremental', enrich_onion_git_params) self.assertIn('github:issues', top_sections) self.assertIn('github:pulls', top_sections) self.assertIn('enrich_onion:github', top_sections) self.assertIn('in_index_iss', enrich_onion_github_params) self.assertIn('in_index_prs', enrich_onion_github_params) self.assertIn('out_index_iss', enrich_onion_github_params) self.assertIn('in_index_prs', enrich_onion_github_params) self.assertIn('data_source_iss', enrich_onion_github_params) self.assertIn('data_source_prs', enrich_onion_github_params) self.assertIn('contribs_field', enrich_onion_github_params) self.assertIn('timeframe_field', enrich_onion_github_params) self.assertIn('sort_on_field', enrich_onion_github_params) self.assertIn('no_incremental', enrich_onion_github_params)
def test_create_dashboard_stackexchange(self, mock_get_dashboard_name, mock_import_dashboard): """ Test the creation of a dashboard which includes stackexchange in data sources """ mock_get_dashboard_name.return_value = '' config = Config(CONF_FILE) task = TaskPanels(config) task.create_dashboard(None, data_sources=["stackexchange"])
def test_create_dashboard_multi_ds(self, mock_get_dashboard_name): """ Test the creation of dashboards with filtered data sources """ mock_get_dashboard_name.return_value = '' config = Config(CONF_FILE) task = TaskPanels(config) task.execute()
def test_get_collection_url(self): """Test whether the collection url could be overwritten in a backend""" config = Config(CONF_FILE) task = Task(config) task.backend_section = "stackexchange" self.assertEqual(task._get_collection_url(), COLLECTION_URL_STACKEXCHANGE)
def test_contains(self): config = Config(CONF_FULL) config.conf = { 'backend:param1': { 'shared_param': 'value 1', 'unique_to_1': 'value 2', }, 'backend:param2': { 'shared_param': 'value 3', 'unique_to_2': 'value 4', }, 'backend:param1:param2': { 'param_combo': 'value 5', } } # Directly contained self.assertIn('backend:param1', config) self.assertIn('backend:param2', config) self.assertIn('backend:param1:param2', config) # Implicitly contained self.assertIn('backend:param2:param1', config) self.assertIn('backend:fake:param1', config) self.assertIn('backend:param1:fake', config) # Not contained self.assertNotIn('backend', config) self.assertNotIn('backend:fake', config) self.assertNotIn('fake-backend:param1', config) # Set the previously missing backend param config.conf['backend'] = { 'arbitrary_data': 'idc', } # Contained after backend specified self.assertIn('backend', config) self.assertIn('backend:fake', config) self.assertIn('backend:param1', config) # Still not contained self.assertNotIn('fake-backend:param1', config) self.assertNotIn('fake-backend', config)
def test_init(self): """Test whether attributes are initializated""" config = Config(CONF_FULL) self.assertIsNotNone(config.conf) self.assertIsNone(config.raw_conf) self.assertEqual(config.conf_list, [CONF_FULL]) self.assertEqual(len(config.conf.keys()), 47)
def test_execute_no_collection(self): """Test whether the raw data is not downloaded when --filter-no-collection is true""" config = Config(CONF_FILE_NO_COLL) cfg = config.get_conf() backend_section = GIT_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) # We need to load the projects TaskProjects(config).execute() self.assertIsNotNone(task.execute()) # Check that the fitler --filter-no-collection works es_collection = cfg['es_collection']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] r = requests.get(raw_index + "/_search?size=0", verify=False) raw_items = r.json()['hits']['total'] self.assertEqual(raw_items, 40)
def test_execute(self): """Test whether the Task could be run""" config = Config(CONF_FILE) cfg = config.get_conf() backend_section = GIT_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) # We need to load the projects TaskProjects(config).execute() self.assertIsNotNone(task.execute()) # Check that the collection went well es_collection = cfg['es_collection']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] r = requests.get(raw_index + "/_search?size=0", verify=False) raw_items = r.json()['hits']['total'] self.assertEqual(raw_items, 3603)
def test_execute(self): """Test whether the Task could be run""" config = Config(CONF_FILE) backend_section = GIT_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) # We need to load the projects TaskProjects(config).execute() self.assertEqual(task.execute(), None)
def test_initialization(self): """Test whether attributes are initializated""" config = Config(CONF_FILE) backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.config, config) self.assertEqual(task.backend_section, backend_section)
def test_execute(self): """Test whether the Task could be run""" def setUp(self): config = Config(CONF_FILE) sh = config.get_conf()['sortinghat'] self.sh_kwargs = { 'user': sh['user'], 'password': sh['password'], 'database': sh['database'], 'host': sh['host'], 'port': None } # Clean the database to start an empty state Database.drop(**self.sh_kwargs) # Create command Database.create(**self.sh_kwargs) self.sh_db = Database(**self.sh_kwargs) config = Config(CONF_FILE) cfg = config.get_conf() # We need to load the projects TaskProjects(config).execute() backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.execute(), None) # Check that the enrichment went well es_collection = cfg['es_collection']['url'] es_enrichment = cfg['es_enrichment']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION][ 'enriched_index'] r = requests.get(raw_index + "/_search?size=0", verify=False) raw_items = r.json()['hits']['total'] r = requests.get(enrich_index + "/_search?size=0", verify=False) enriched_items = r.json()['hits']['total'] self.assertEqual(raw_items, enriched_items)