Exemplo n.º 1
0
class TestBatchList:
    """ Batch list tests group. """
    batch_list = None
    DUMMY_ITEMS = [{"item": 1}, {"item": 2}, {"item": 3}]

    @pytest.fixture(autouse=True)
    def setup_batch_list(self):
        """ Fixture for creating batch list. """
        self.batch_list = BatchList()

    def test_empty(self):
        """ Test for empty batch list. """
        assert self.batch_list.get_total_items() == 0

    def test_insert_few(self):
        """ Test for inserton of one item. """
        for item in self.DUMMY_ITEMS:
            self.batch_list.add_item(item)

        assert self.batch_list.get_total_items() == len(self.DUMMY_ITEMS)

    def test_insert_full_batch(self):
        """ Test for insertion of multiple items, until new batch needs to be created. """
        max_size = int(BATCH_MAX_SIZE)

        for count in range(0, max_size + 1):
            self.batch_list.add_item({"item": count})

        assert self.batch_list.get_total_items() == max_size + 1
        assert len(self.batch_list.batches) == 2
        assert len(self.batch_list.batches[0]) == max_size
        assert len(self.batch_list.batches[1]) == 1

    def test_clear(self):
        """ Test for clearing the list. """
        for item in self.DUMMY_ITEMS:
            self.batch_list.add_item(item)

        self.batch_list.clear()
        assert len(self.batch_list.batches) == 0
Exemplo n.º 2
0
    def store(self):  # pylint: disable=too-many-branches,too-many-statements
        """Sync all OVAL feeds. Process files in batches due to disk space and memory usage."""
        self.logger.info("Checking OVAL feed.")
        failed = self._download_feed()
        if failed:
            for path in failed:
                FAILED_IMPORT_OVAL.inc()
                self.logger.warning("OVAL feed failed to download, %s (HTTP CODE %d).", path, failed[path])
            self.clean()
            return

        db_oval_files = self.oval_store.oval_file_map.copy()
        batches = BatchList()
        up_to_date = 0

        # Filter out all not updated OVAL definition files
        with open(self.feed_path, 'r', encoding='utf8') as feed_file:
            feed = json.load(feed_file)
        feed_oval_files = {entry["id"]: entry for entry in feed["feed"]["entry"]}
        for entry in feed_oval_files.values():
            if self._skip_oval_definition_file(entry['id'], feed_oval_files):
                continue
            db_timestamp = db_oval_files.get(entry['id'])
            feed_timestamp = parse_datetime(entry["updated"])
            if not db_timestamp or feed_timestamp > db_timestamp[1]:
                local_path = os.path.join(self.tmp_directory, entry["content"]["src"].replace(OVAL_FEED_BASE_URL, ""))
                oval_definitions_file = OvalDefinitions(entry["id"], feed_timestamp,
                                                        entry["content"]["src"], local_path)
                batches.add_item(oval_definitions_file)
            else:
                up_to_date += 1
            db_oval_files.pop(entry["id"], None)
        feed_updated = parse_datetime(feed["feed"]["updated"])

        self.logger.info("%d OVAL definition files are up to date.", up_to_date)
        total_oval_files = batches.get_total_items()
        completed_oval_files = 0
        self.logger.info("%d OVAL definition files need to be synced.", total_oval_files)
        self.logger.info("%d OVAL definition files need to be deleted.", len(db_oval_files))

        try:
            for batch in batches:
                self.logger.info("Syncing a batch of %d OVAL definition files", len(batch))
                failed = self._download_definitions(batch)
                if failed:
                    self.logger.warning("%d OVAL definition files failed to download.", len(failed))
                    batch = [oval_file for oval_file in batch if oval_file.local_path not in failed]
                self._unpack_definitions(batch)
                for oval_definitions_file in batch:
                    completed_oval_files += 1
                    try:
                        oval_definitions_file.load_metadata()
                        self.logger.info("Syncing OVAL definition file: %s [%s/%s]", oval_definitions_file.oval_id,
                                         completed_oval_files, total_oval_files)
                        self.oval_store.store(oval_definitions_file)
                    finally:
                        oval_definitions_file.unload_metadata()
            self.delete_oval_file(list(db_oval_files))
            # Timestamp of main feed file
            self.oval_store.save_lastmodified(feed_updated)
        finally:
            self.clean()
Exemplo n.º 3
0
    def store(self):  # pylint: disable=too-many-branches,too-many-statements
        """Sync all queued repositories. Process repositories in batches due to disk space and memory usage."""
        self.logger.info("Checking %d repositories.", len(self.repositories))

        self._write_certificate_cache()

        # Download all repomd files first
        failed = self._download_repomds()
        if failed:
            FAILED_REPOMD.inc(len(failed))
            failed_repos = [
                repo for repo in sorted(self.repositories,
                                        key=attrgetter("repo_url"))
                if self._repo_download_failed(repo, failed)
            ]
            self.logger.warning("%d repomd.xml files failed to download.",
                                len(failed))
            self.clean_repodata(failed_repos)

        self._read_repomds()
        # Filter all repositories without repomd attribute set (downloaded repomd is not newer)
        batches = BatchList()
        up_to_date = []

        def md_size(repomd, data_type):
            try:
                mdata = repomd.get_metadata(data_type)
                # open-size is not present for uncompressed files
                return int(mdata.get('size', 0)) + int(
                    mdata.get('open-size', '0'))
            except RepoMDTypeNotFound:
                return 0

        for repository in sorted(self.repositories,
                                 key=attrgetter("repo_url")):
            if repository.repomd:

                repo_size = md_size(repository.repomd, 'primary_db')
                # If we use primary_db, we don't even download primary data xml
                if repo_size == 0:
                    repo_size += md_size(repository.repomd, 'primary')

                repo_size += md_size(repository.repomd, 'updateinfo')
                repo_size += md_size(repository.repomd, 'modules')

                batches.add_item(repository, repo_size)
            else:
                up_to_date.append(repository)

        self.clean_repodata(up_to_date)
        self.logger.info("%d repositories are up to date.", len(up_to_date))
        total_repositories = batches.get_total_items()
        completed_repositories = 0
        self.logger.info("%d repositories need to be synced.",
                         total_repositories)

        # Download and process repositories in batches (unpacked metadata files can consume lot of disk space)
        try:  # pylint: disable=too-many-nested-blocks
            for batch in batches:
                self.logger.info("Syncing a batch of %d repositories",
                                 len(batch))
                try:
                    failed = self._download_metadata(batch)
                    if failed:
                        self.logger.warning(
                            "%d metadata files failed to download.",
                            len(failed))
                        failed_repos = [
                            repo for repo in batch
                            if self._repo_download_failed(repo, failed)
                        ]
                        self.clean_repodata(failed_repos)
                        batch = [
                            repo for repo in batch if repo not in failed_repos
                        ]
                    self._unpack_metadata(batch)
                    for repository in batch:
                        completed_repositories += 1
                        try:
                            repository.load_metadata()
                            self.logger.info(
                                "Syncing repository: %s [%s/%s]", ", ".join(
                                    filter(None, (repository.content_set,
                                                  repository.basearch,
                                                  repository.releasever))),
                                completed_repositories, total_repositories)
                            self.repo_store.store(repository)
                        except Exception:  # pylint: disable=broad-except
                            self.logger.warning(
                                "Syncing repository failed: %s [%s/%s]",
                                ", ".join(
                                    filter(None, (repository.content_set,
                                                  repository.basearch,
                                                  repository.releasever))),
                                completed_repositories, total_repositories)
                            self.logger.exception("Exception: ")
                            FAILED_IMPORT_REPO.inc()
                        finally:
                            repository.unload_metadata()
                finally:
                    self.clean_repodata(batch)
        finally:
            self.repo_store.cleanup_unused_data()
            self._clean_certificate_cache()