def test_execute_from_archive(self): """Test fetching data from archives""" # proj_file -> 'test-projects-archive.json' stored within the conf file config = Config(CONF_ARCHIVE_FILE) backend_sections = [ 'askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse', 'dockerhub', 'gerrit', 'github:issue', 'github:pull', 'gitlab:issue', 'gitlab:merge', 'google_hits', 'jenkins', 'jira', 'mediawiki', 'meetup', 'mozillaclub', 'nntp', 'phabricator', 'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'telegram', 'twitter' ] # We need to load the projects TaskProjects(config).execute() for backend_section in backend_sections: task = TaskRawDataCollection(config, backend_section=backend_section) errors = task.execute() for err in errors: self.assertIn('backend', err) self.assertIn('repo', err) self.assertIn('error', err)
def test_backend_params(self): """Test whether the backend parameters are initializated""" config = Config(CONF_FILE) backend_section = GITHUB_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) params = task._compose_perceval_params(GITHUB_BACKEND_SECTION, GITHUB_REPO) expected_params = [ 'grimoirelab', 'perceval', '--api-token', 'XXXXX', '--sleep-time', '300', '--sleep-for-rate', '--category', 'issue', '--archive-path', '/tmp/test_github_archive' ] self.assertEqual(len(params), len(expected_params)) for p in params: self.assertTrue(p in expected_params)
def test_execute(self): """Test whether the Task could be run""" config = Config(CONF_FILE) backend_section = GIT_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) # We need to load the projects TaskProjects(config).execute() self.assertEqual(task.execute(), None)
def get_raw(config, backend_section): """Execute the raw phase for a given backend section :param config: a Mordred config object :param backend_section: the backend section where the raw phase is executed """ task = TaskRawDataCollection(config, backend_section=backend_section) TaskProjects(config).execute() try: task.execute() logging.info("Loading raw data finished!") except Exception as e: logging.error(str(e)) sys.exit(-1)
def get_raw(config, backend_section, repos_to_check=None): """Execute the raw phase for a given backend section Repos are only checked if they are in BOTH `repos_to_check` and the `projects.json` :param config: a Mordred config object :param backend_section: the backend section where the raw phase is executed :param repos_to_check: A list of repo URLs to check, or None to check all repos """ task = TaskRawDataCollection(config, backend_section=backend_section, allowed_repos=repos_to_check) TaskProjects(config).execute() try: task.execute() logging.info("Loading raw data finished!") except Exception as e: logging.error(str(e)) sys.exit(-1)
def test_initialization(self): """Test whether attributes are initializated""" config = Config(CONF_FILE) backend_section = GIT_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) self.assertEqual(task.config, config) self.assertEqual(task.backend_section, backend_section)
def test_execute(self): """Test whether the Task could be run""" config = Config(CONF_FILE) cfg = config.get_conf() backend_section = GIT_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) # We need to load the projects TaskProjects(config).execute() self.assertIsNotNone(task.execute()) # Check that the collection went well es_collection = cfg['es_collection']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] r = requests.get(raw_index + "/_search?size=0", verify=False) raw_items = r.json()['hits']['total'] self.assertEqual(raw_items, 3603)
def test_execute_no_collection(self): """Test whether the raw data is not downloaded when --filter-no-collection is true""" config = Config(CONF_FILE_NO_COLL) cfg = config.get_conf() backend_section = GIT_BACKEND_SECTION task = TaskRawDataCollection(config, backend_section=backend_section) # We need to load the projects TaskProjects(config).execute() self.assertIsNotNone(task.execute()) # Check that the fitler --filter-no-collection works es_collection = cfg['es_collection']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] r = requests.get(raw_index + "/_search?size=0", verify=False) raw_items = r.json()['hits']['total'] self.assertEqual(raw_items, 40)
def test_execute_from_archive(self): """Test fetching data from archives""" # proj_file -> 'test-projects-archive.json' stored within the conf file conf_file = 'archives-test.cfg' config = Config(conf_file) backend_sections = [ 'askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse', 'dockerhub', 'gerrit', 'github', 'jenkins', 'jira', 'mediawiki', 'meetup', 'mozillaclub', 'nntp', 'phabricator', 'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'telegram', 'twitter' ] for backend_section in backend_sections: task = TaskRawDataCollection(config, backend_section=backend_section) # We need to load the projects TaskProjects(config).execute() self.assertEqual(task.execute(), None)
def get_raw(config, backend_section, arthur): """Execute the raw phase for a given backend section, optionally using Arthur :param config: a Mordred config object :param backend_section: the backend section where the raw phase is executed :param arthur: if true, it enables Arthur to collect the raw data """ if arthur: task = TaskRawDataArthurCollection(config, backend_section=backend_section) else: task = TaskRawDataCollection(config, backend_section=backend_section) TaskProjects(config).execute() task.execute() logging.info("Loading raw data finished!")