class TestBatchList: """TestBatchList class. Test creating list of lists""" @pytest.fixture() def batchlist(self): """Setup for batchlist testing.""" self.blist = BatchList() def test_empty_batch(self, batchlist): """Test empty batchlist.""" assert not self.blist.batches # Assuming default is 50, 102 = 3 batches, 50/50/2 ; 150 = 50/50/50; 157 == 4, 50/50/50/7 # move thru the batches, making sure each other than the last is at most BATCH_MAX_SIZE long # and each batch has cumulative file_size less than BATCH_MAX_FILESIZE @pytest.mark.parametrize("list_size", [102, 150, 157]) @pytest.mark.parametrize("item_filesize", ITEM_FILESIZES) def test_batch_creation(self, batchlist, list_size, item_filesize): """Test creation of batch list.""" for i in range(list_size): self.blist.add_item(i, item_filesize) # batch size is variable, if items are too large, the batch might contain less than BATCH_MAX_SIZE items batch_size = min(int(BATCH_MAX_SIZE), int(BATCH_MAX_FILESIZE) // item_filesize) total_batches = math.ceil(list_size / batch_size) last_batch_size = list_size % batch_size assert len(self.blist.batches) == total_batches for curr_batch in range(total_batches): if curr_batch == (total_batches - 1) and last_batch_size > 0: expected_num_in_batch = last_batch_size else: expected_num_in_batch = batch_size assert len(self.blist.batches[curr_batch]) == expected_num_in_batch def test_invalid_batch_size(self, batchlist): """ Test creation of an invalid batch list. Should fail because single item is larger than max batch size """ with pytest.raises(AssertionError): self.test_batch_creation(batchlist, 102, int(BATCH_MAX_FILESIZE) + 1)
class TestBatchList: """ Batch list tests group. """ batch_list = None DUMMY_ITEMS = [{"item": 1}, {"item": 2}, {"item": 3}] @pytest.fixture(autouse=True) def setup_batch_list(self): """ Fixture for creating batch list. """ self.batch_list = BatchList() def test_empty(self): """ Test for empty batch list. """ assert self.batch_list.get_total_items() == 0 def test_insert_few(self): """ Test for inserton of one item. """ for item in self.DUMMY_ITEMS: self.batch_list.add_item(item) assert self.batch_list.get_total_items() == len(self.DUMMY_ITEMS) def test_insert_full_batch(self): """ Test for insertion of multiple items, until new batch needs to be created. """ max_size = int(BATCH_MAX_SIZE) for count in range(0, max_size + 1): self.batch_list.add_item({"item": count}) assert self.batch_list.get_total_items() == max_size + 1 assert len(self.batch_list.batches) == 2 assert len(self.batch_list.batches[0]) == max_size assert len(self.batch_list.batches[1]) == 1 def test_clear(self): """ Test for clearing the list. """ for item in self.DUMMY_ITEMS: self.batch_list.add_item(item) self.batch_list.clear() assert len(self.batch_list.batches) == 0
def setup_batch_list(self): """ Fixture for creating batch list. """ self.batch_list = BatchList()
def store(self): # pylint: disable=too-many-branches,too-many-statements """Sync all OVAL feeds. Process files in batches due to disk space and memory usage.""" self.logger.info("Checking OVAL feed.") failed = self._download_feed() if failed: for path in failed: FAILED_IMPORT_OVAL.inc() self.logger.warning("OVAL feed failed to download, %s (HTTP CODE %d).", path, failed[path]) self.clean() return db_oval_files = self.oval_store.oval_file_map.copy() batches = BatchList() up_to_date = 0 # Filter out all not updated OVAL definition files with open(self.feed_path, 'r', encoding='utf8') as feed_file: feed = json.load(feed_file) feed_oval_files = {entry["id"]: entry for entry in feed["feed"]["entry"]} for entry in feed_oval_files.values(): if self._skip_oval_definition_file(entry['id'], feed_oval_files): continue db_timestamp = db_oval_files.get(entry['id']) feed_timestamp = parse_datetime(entry["updated"]) if not db_timestamp or feed_timestamp > db_timestamp[1]: local_path = os.path.join(self.tmp_directory, entry["content"]["src"].replace(OVAL_FEED_BASE_URL, "")) oval_definitions_file = OvalDefinitions(entry["id"], feed_timestamp, entry["content"]["src"], local_path) batches.add_item(oval_definitions_file) else: up_to_date += 1 db_oval_files.pop(entry["id"], None) feed_updated = parse_datetime(feed["feed"]["updated"]) self.logger.info("%d OVAL definition files are up to date.", up_to_date) total_oval_files = batches.get_total_items() completed_oval_files = 0 self.logger.info("%d OVAL definition files need to be synced.", total_oval_files) self.logger.info("%d OVAL definition files need to be deleted.", len(db_oval_files)) try: for batch in batches: self.logger.info("Syncing a batch of %d OVAL definition files", len(batch)) failed = self._download_definitions(batch) if failed: self.logger.warning("%d OVAL definition files failed to download.", len(failed)) batch = [oval_file for oval_file in batch if oval_file.local_path not in failed] self._unpack_definitions(batch) for oval_definitions_file in batch: completed_oval_files += 1 try: oval_definitions_file.load_metadata() self.logger.info("Syncing OVAL definition file: %s [%s/%s]", oval_definitions_file.oval_id, completed_oval_files, total_oval_files) self.oval_store.store(oval_definitions_file) finally: oval_definitions_file.unload_metadata() self.delete_oval_file(list(db_oval_files)) # Timestamp of main feed file self.oval_store.save_lastmodified(feed_updated) finally: self.clean()
def batchlist(self): """Setup for batchlist testing.""" self.blist = BatchList()
def store(self): # pylint: disable=too-many-branches,too-many-statements """Sync all queued repositories. Process repositories in batches due to disk space and memory usage.""" self.logger.info("Checking %d repositories.", len(self.repositories)) self._write_certificate_cache() # Download all repomd files first failed = self._download_repomds() if failed: FAILED_REPOMD.inc(len(failed)) failed_repos = [ repo for repo in sorted(self.repositories, key=attrgetter("repo_url")) if self._repo_download_failed(repo, failed) ] self.logger.warning("%d repomd.xml files failed to download.", len(failed)) self.clean_repodata(failed_repos) self._read_repomds() # Filter all repositories without repomd attribute set (downloaded repomd is not newer) batches = BatchList() up_to_date = [] def md_size(repomd, data_type): try: mdata = repomd.get_metadata(data_type) # open-size is not present for uncompressed files return int(mdata.get('size', 0)) + int( mdata.get('open-size', '0')) except RepoMDTypeNotFound: return 0 for repository in sorted(self.repositories, key=attrgetter("repo_url")): if repository.repomd: repo_size = md_size(repository.repomd, 'primary_db') # If we use primary_db, we don't even download primary data xml if repo_size == 0: repo_size += md_size(repository.repomd, 'primary') repo_size += md_size(repository.repomd, 'updateinfo') repo_size += md_size(repository.repomd, 'modules') batches.add_item(repository, repo_size) else: up_to_date.append(repository) self.clean_repodata(up_to_date) self.logger.info("%d repositories are up to date.", len(up_to_date)) total_repositories = batches.get_total_items() completed_repositories = 0 self.logger.info("%d repositories need to be synced.", total_repositories) # Download and process repositories in batches (unpacked metadata files can consume lot of disk space) try: # pylint: disable=too-many-nested-blocks for batch in batches: self.logger.info("Syncing a batch of %d repositories", len(batch)) try: failed = self._download_metadata(batch) if failed: self.logger.warning( "%d metadata files failed to download.", len(failed)) failed_repos = [ repo for repo in batch if self._repo_download_failed(repo, failed) ] self.clean_repodata(failed_repos) batch = [ repo for repo in batch if repo not in failed_repos ] self._unpack_metadata(batch) for repository in batch: completed_repositories += 1 try: repository.load_metadata() self.logger.info( "Syncing repository: %s [%s/%s]", ", ".join( filter(None, (repository.content_set, repository.basearch, repository.releasever))), completed_repositories, total_repositories) self.repo_store.store(repository) except Exception: # pylint: disable=broad-except self.logger.warning( "Syncing repository failed: %s [%s/%s]", ", ".join( filter(None, (repository.content_set, repository.basearch, repository.releasever))), completed_repositories, total_repositories) self.logger.exception("Exception: ") FAILED_IMPORT_REPO.inc() finally: repository.unload_metadata() finally: self.clean_repodata(batch) finally: self.repo_store.cleanup_unused_data() self._clean_certificate_cache()