def test_execute_from_archive(self):
        """Test fetching data from archives"""

        # proj_file -> 'test-projects-archive.json' stored within the conf file
        conf_file = 'archives-test.cfg'
        config = Config(conf_file)

        backend_sections = [
            'askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse',
            'dockerhub', 'gerrit', 'github:issue', 'github:pull',
            'gitlab:issue', 'gitlab:merge', 'google_hits', 'jenkins', 'jira',
            'mediawiki', 'meetup', 'mozillaclub', 'nntp', 'phabricator',
            'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'telegram',
            'twitter'
        ]

        # We need to load the projects
        TaskProjects(config).execute()
        for backend_section in backend_sections:
            task = TaskRawDataCollection(config,
                                         backend_section=backend_section)
            task.execute()

        for backend_section in backend_sections:
            task = TaskEnrich(config, backend_section=backend_section)
            self.assertEqual(task.execute(), None)
Exemplo n.º 2
0
def get_raw(config, backend_section):
    """Execute the raw phase for a given backend section

    :param config: a Mordred config object
    :param backend_section: the backend section where the raw phase is executed
    """

    task = TaskRawDataCollection(config, backend_section=backend_section)
    TaskProjects(config).execute()
    try:
        task.execute()
        logging.info("Loading raw data finished!")
    except Exception as e:
        logging.error(str(e))
        sys.exit(-1)
Exemplo n.º 3
0
def get_raw(config, backend_section, repos_to_check=None):
    """Execute the raw phase for a given backend section

    Repos are only checked if they are in BOTH `repos_to_check` and the `projects.json`

    :param config: a Mordred config object
    :param backend_section: the backend section where the raw phase is executed
    :param repos_to_check: A list of repo URLs to check, or None to check all repos
    """

    task = TaskRawDataCollection(config, backend_section=backend_section, allowed_repos=repos_to_check)
    TaskProjects(config).execute()
    try:
        task.execute()
        logging.info("Loading raw data finished!")
    except Exception as e:
        logging.error(str(e))
        sys.exit(-1)
    def test_execute(self):
        """Test whether the Task could be run"""

        config = Config(CONF_FILE)
        backend_section = GIT_BACKEND_SECTION
        task = TaskRawDataCollection(config, backend_section=backend_section)
        # We need to load the projects
        TaskProjects(config).execute()
        self.assertEqual(task.execute(), None)
    def test_execute(self):
        """Test whether the Task could be run"""

        config = Config(CONF_FILE)
        cfg = config.get_conf()
        backend_section = GIT_BACKEND_SECTION
        task = TaskRawDataCollection(config, backend_section=backend_section)
        # We need to load the projects
        TaskProjects(config).execute()
        self.assertIsNotNone(task.execute())

        # Check that the collection went well
        es_collection = cfg['es_collection']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']

        r = requests.get(raw_index + "/_search?size=0", verify=False)
        raw_items = r.json()['hits']['total']
        self.assertEqual(raw_items, 3603)
    def test_execute_no_collection(self):
        """Test whether the raw data is not downloaded when --filter-no-collection is true"""

        config = Config(CONF_FILE_NO_COLL)
        cfg = config.get_conf()
        backend_section = GIT_BACKEND_SECTION
        task = TaskRawDataCollection(config, backend_section=backend_section)
        # We need to load the projects
        TaskProjects(config).execute()
        self.assertIsNotNone(task.execute())

        # Check that the fitler --filter-no-collection works
        es_collection = cfg['es_collection']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']

        r = requests.get(raw_index + "/_search?size=0", verify=False)
        raw_items = r.json()['hits']['total']
        self.assertEqual(raw_items, 40)