def test_repository_tag_history(namespace, name, expected_tag_count, has_expired, registry_model, with_size_fallback): # Pre-cache media type loads to ensure consistent query count. Manifest.media_type.get_name(1) # If size fallback is requested, delete the sizes on the manifest rows. if with_size_fallback: Manifest.update(layers_compressed_size=None).execute() repository_ref = registry_model.lookup_repository(namespace, name) with assert_query_count(2 if with_size_fallback else 1): history, has_more = registry_model.list_repository_tag_history( repository_ref) assert not has_more assert len(history) == expected_tag_count for tag in history: # Retrieve the manifest to ensure it doesn't issue extra queries. tag.manifest # Verify that looking up the size doesn't issue extra queries. tag.manifest_layers_size if has_expired: # Ensure the latest tag is marked expired, since there is an expired one. with assert_query_count(1): assert registry_model.has_expired_tag(repository_ref, "latest")
def populate_manifest(repository, manifest, legacy_image, storage_ids): """ Populates the rows for the manifest, including its blobs and legacy image. """ media_type = Manifest.media_type.get_id(manifest.media_type) # Check for an existing manifest. If present, return it. try: return Manifest.get(repository=repository, digest=manifest.digest) except Manifest.DoesNotExist: pass with db_transaction(): try: manifest_row = Manifest.create( digest=manifest.digest, repository=repository, manifest_bytes=manifest.bytes.as_encoded_str(), media_type=media_type, ) except IntegrityError as ie: logger.debug( "Got integrity error when trying to write manifest: %s", ie) return Manifest.get(repository=repository, digest=manifest.digest) ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image) blobs_to_insert = [ dict(manifest=manifest_row, repository=repository, blob=storage_id) for storage_id in storage_ids ] if blobs_to_insert: ManifestBlob.insert_many(blobs_to_insert).execute() return manifest_row
def test_perform_indexing_needs_reindexing_skip_unsupported( initialized_db, set_secscan_config): secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "new hash"} secscan._secscan_api.index.return_value = ( { "err": None, "state": IndexReportState.Index_Finished }, "new hash", ) for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, error_json={}, index_status=IndexStatus.MANIFEST_UNSUPPORTED, indexer_hash="old hash", indexer_version=IndexerVersion.V4, last_indexed=datetime.utcnow() - timedelta( seconds=app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] + 60), metadata_json={}, ) secscan.perform_indexing() # Since this manifest should not be scanned, the old hash should remain assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.indexer_hash == "old hash"
def test_perform_indexing_failed(initialized_db, set_secscan_config): secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} secscan._secscan_api.index.return_value = ( { "err": None, "state": IndexReportState.Index_Finished }, "abc", ) for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, error_json={}, index_status=IndexStatus.FAILED, indexer_hash="abc", indexer_version=IndexerVersion.V4, last_indexed=datetime.utcnow() - timedelta( seconds=app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] + 60), metadata_json={}, ) secscan.perform_indexing() assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.COMPLETED
def perform_indexing(self, start_token=None, batch_size=None): try: indexer_state = self._secscan_api.state() except APIRequestFailure: return None if not batch_size: batch_size = self.app.config.get("SECURITY_SCANNER_V4_BATCH_SIZE", 0) reindex_threshold = datetime.utcnow() - timedelta( seconds=self.app.config.get( "SECURITY_SCANNER_V4_REINDEX_THRESHOLD", 86400)) max_id = Manifest.select(fn.Max(Manifest.id)).scalar() start_index = (start_token.min_id if start_token is not None else Manifest.select(fn.Min(Manifest.id)).scalar()) if max_id is None or start_index is None or start_index > max_id: return None iterator = self._get_manifest_iterator( indexer_state, start_index, max_id, batch_size=batch_size, reindex_threshold=reindex_threshold, ) self._index(iterator, reindex_threshold) return ScanToken(max_id + 1)
def test_manifest_iterator(initialized_db, set_secscan_config, index_status, indexer_state, seconds, expect_zero): secscan = V4SecurityScanner(app, instance_keys, storage) for manifest in Manifest.select(): with db_transaction(): ManifestSecurityStatus.delete().where( ManifestSecurityStatus.manifest == manifest, ManifestSecurityStatus.repository == manifest.repository, ).execute() ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, error_json={}, index_status=index_status, indexer_hash="old hash", indexer_version=IndexerVersion.V4, last_indexed=datetime.utcnow() - timedelta(seconds=seconds), metadata_json={}, ) iterator = secscan._get_manifest_iterator( indexer_state, Manifest.select(fn.Min(Manifest.id)).scalar(), Manifest.select(fn.Max(Manifest.id)).scalar(), ) count = 0 for candidate, abt, num_remaining in iterator: count = count + 1 if expect_zero: assert count == 0 else: assert count != 0
def test_perform_indexing_needs_reindexing_within_reindex_threshold( initialized_db, set_secscan_config): app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] = 300 secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "xyz"} secscan._secscan_api.index.return_value = ( { "err": None, "state": IndexReportState.Index_Finished }, "xyz", ) for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, error_json={}, index_status=IndexStatus.COMPLETED, indexer_hash="abc", indexer_version=IndexerVersion.V4, metadata_json={}, ) secscan.perform_indexing() assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.indexer_hash == "abc"
def test_perform_indexing_api_request_failure_index(initialized_db, set_secscan_config): secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} secscan._secscan_api.index.side_effect = APIRequestFailure() next_token = secscan.perform_indexing() assert next_token is None assert ManifestSecurityStatus.select().count() == 0 # Set security scanner to return good results and attempt indexing again secscan._secscan_api.index.side_effect = None secscan._secscan_api.index.return_value = ( { "err": None, "state": IndexReportState.Index_Finished }, "abc", ) next_token = secscan.perform_indexing() assert next_token.min_id == Manifest.select(fn.Max( Manifest.id)).scalar() + 1 assert ManifestSecurityStatus.select().count() == Manifest.select( fn.Max(Manifest.id)).count()
def test_perform_indexing_whitelist(initialized_db, set_secscan_config): app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] expected_manifests = (Manifest.select().join(Repository).join(User).where( User.username == "devtable")) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} secscan._secscan_api.index.return_value = ( { "err": None, "state": IndexReportState.Index_Finished }, "abc", ) next_token = secscan.perform_indexing() assert secscan._secscan_api.index.call_count == expected_manifests.count() for mss in ManifestSecurityStatus.select(): assert mss.repository.namespace_user.username == "devtable" assert ManifestSecurityStatus.select().count() == expected_manifests.count( ) assert ( Manifest.get_by_id(next_token.min_id - 1).repository.namespace_user.username == "devtable")
def delete_manifests(): ManifestLegacyImage.delete().execute() ManifestBlob.delete().execute() Manifest.delete().execute() TagManifestToManifest.delete().execute() TagManifest.delete().execute() return "OK"
def _backfill_manifests(self): try: Manifest.select().where( Manifest.layers_compressed_size >> None).get() except Manifest.DoesNotExist: logger.debug("Manifest backfill worker has completed; skipping") return False iterator = yield_random_entries( lambda: Manifest.select().where(Manifest.layers_compressed_size >> None), Manifest.id, 250, Manifest.select(fn.Max(Manifest.id)).scalar(), 1, ) for manifest_row, abt, _ in iterator: if manifest_row.layers_compressed_size is not None: logger.debug("Another worker preempted this worker") abt.set() continue logger.debug("Setting layers compressed size for manifest %s", manifest_row.id) layers_compressed_size = -1 config_media_type = None manifest_bytes = Bytes.for_string_or_unicode( manifest_row.manifest_bytes) try: parsed = parse_manifest_from_bytes( manifest_bytes, manifest_row.media_type.name, validate=False) layers_compressed_size = parsed.layers_compressed_size if layers_compressed_size is None: layers_compressed_size = 0 config_media_type = parsed.config_media_type or None except ManifestException as me: logger.warning( "Got exception when trying to parse manifest %s: %s", manifest_row.id, me) assert layers_compressed_size is not None updated = (Manifest.update( layers_compressed_size=layers_compressed_size, config_media_type=config_media_type, ).where(Manifest.id == manifest_row.id, Manifest.layers_compressed_size >> None).execute()) if updated != 1: logger.debug("Another worker preempted this worker") abt.set() continue return True
def clear_rows(initialized_db): # Remove all new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute()
def test_load_security_information_unknown_manifest(initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(tag) registry_model.populate_legacy_images_for_testing(manifest, storage) # Delete the manifest. Manifest.get(id=manifest._db_id).delete_instance(recursive=True) secscan = V2SecurityScanner(app, instance_keys, storage) assert (secscan.load_security_information(manifest).status == ScanLookupStatus.UNSUPPORTED_FOR_INDEXING)
def test_perform_indexing_api_request_index_error_response(initialized_db, set_secscan_config): secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "xyz"} secscan._secscan_api.index.return_value = ( {"err": "something", "state": IndexReportState.Index_Error}, "xyz", ) next_token = secscan.perform_indexing() assert next_token.min_id == Manifest.select(fn.Max(Manifest.id)).scalar() + 1 assert ManifestSecurityStatus.select().count() == Manifest.select(fn.Max(Manifest.id)).count() for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.FAILED
def test_perform_indexing_manifest_list(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(tag) Manifest.update(media_type=MediaType.get( name=DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE)).execute() secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan.perform_indexing() assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.MANIFEST_UNSUPPORTED
def test_list_manifest_labels(initialized_db): manifest = Manifest.get() label1 = create_manifest_label(manifest, 'foo', '1', 'manifest') label2 = create_manifest_label(manifest, 'bar', '2', 'api') label3 = create_manifest_label(manifest, 'baz', '3', 'internal') assert label1 in list_manifest_labels(manifest) assert label2 in list_manifest_labels(manifest) assert label3 in list_manifest_labels(manifest) other_manifest = Manifest.select().where(Manifest.id != manifest.id).get() assert label1 not in list_manifest_labels(other_manifest) assert label2 not in list_manifest_labels(other_manifest) assert label3 not in list_manifest_labels(other_manifest)
def test_change_tag_expiration(expiration_offset, expected_offset, initialized_db): repository = create_repository('devtable', 'somenewrepo', None) image1 = find_create_or_link_image('foobarimage1', repository, None, {}, 'local_us') manifest = Manifest.get() footag = create_or_update_tag_for_repo(repository, 'foo', image1.docker_image_id, oci_manifest=manifest) expiration_date = None if expiration_offset is not None: expiration_date = datetime.utcnow() + convert_to_timedelta(expiration_offset) assert change_tag_expiration(footag, expiration_date) # Lookup the tag again. footag_updated = get_active_tag('devtable', 'somenewrepo', 'foo') oci_tag = _get_oci_tag(footag_updated) if expected_offset is None: assert footag_updated.lifetime_end_ts is None assert oci_tag.lifetime_end_ms is None else: start_date = datetime.utcfromtimestamp(footag_updated.lifetime_start_ts) end_date = datetime.utcfromtimestamp(footag_updated.lifetime_end_ts) expected_end_date = start_date + convert_to_timedelta(expected_offset) assert (expected_end_date - end_date).total_seconds() < 5 # variance in test assert oci_tag.lifetime_end_ms == (footag_updated.lifetime_end_ts * 1000)
def force_cache_repo_size(repo_id: int): try: cache = (Manifest.select( fn.Sum(Manifest.layers_compressed_size).alias("size_bytes")).where( Manifest.repository == repo_id)).scalar() size = cache except Manifest.DoesNotExist: size = 0 if size is None: size = 0 with db_transaction(): repo_size_ref = get_repository_size(repo_id) try: if repo_size_ref is not None: update = RepositorySize.update(size_bytes=size).where( RepositorySize.repository_id == repo_id) update.execute() else: RepositorySize.create(repository_id=repo_id, size_bytes=size) except IntegrityError: # It it possible that this gets preempted by another worker. # If that's the case, it should be safe to just ignore the IntegrityError, # as the RepositorySize should have been created with the correct value. logger.warning("RepositorySize for repo id %s already exists", repo_id) return size return size
def test_perform_indexing_failed_within_reindex_threshold( initialized_db, set_secscan_config): app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] = 300 expected_manifests = (Manifest.select().join(Repository).join(User).where( User.username == "devtable")) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} secscan._secscan_api.index.return_value = ( { "err": None, "state": IndexReportState.Index_Finished }, "abc", ) for manifest in expected_manifests: ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, error_json={}, index_status=IndexStatus.FAILED, indexer_hash="abc", indexer_version=IndexerVersion.V4, metadata_json={}, ) secscan.perform_indexing() assert ManifestSecurityStatus.select().count() == expected_manifests.count( ) for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.FAILED
def perform_indexing_recent_manifests(self, batch_size=None): try: indexer_state = self._secscan_api.state() except APIRequestFailure: return None if not batch_size: batch_size = self.app.config.get("SECURITY_SCANNER_V4_BATCH_SIZE", 0) reindex_threshold = datetime.utcnow() - timedelta( seconds=self.app.config.get( "SECURITY_SCANNER_V4_REINDEX_THRESHOLD", 86400)) end_index = Manifest.select(fn.Max(Manifest.id)).scalar() start_index = max(end_index - batch_size, 1) iterator = self._get_manifest_iterator( indexer_state, start_index, end_index, batch_size=max(batch_size // 20, 1), reindex_threshold=reindex_threshold, ) self._index(iterator, reindex_threshold)
def cache_namespace_repository_sizes(namespace_name): namespace = user.get_user_or_org(namespace_name) now_ms = get_epoch_timestamp_ms() subquery = (Tag.select(Tag.repository_id).where( Tag.hidden == False).where((Tag.lifetime_end_ms >> None) | (Tag.lifetime_end_ms > now_ms)).group_by( Tag.repository_id).having( fn.Count(Tag.name) > 0)) namespace_repo_sizes = (Manifest.select( (Repository.id).alias("repository_id"), (Repository.name).alias("repository_name"), fn.sum(Manifest.layers_compressed_size).alias("repository_size"), ).join(Repository).join( subquery, on=(subquery.c.repository_id == Repository.id)).where( Repository.namespace_user == namespace.id).group_by(Repository.id)) insert_query = (namespace_repo_sizes.select( Repository.id, fn.sum(Manifest.layers_compressed_size)).join_from( Repository, RepositorySize, JOIN.LEFT_OUTER).where(RepositorySize.repository_id.is_null())) RepositorySize.insert_from( insert_query, fields=[RepositorySize.repository_id, RepositorySize.size_bytes], ).execute()
def create_manifest( repository_id: int, manifest: ManifestInterface | ManifestListInterface, raise_on_error: bool = True, ) -> Manifest: """ Creates a manifest in the database. Does not handle sub manifests in a manifest list/index. Raises a _ManifestAlreadyExists exception if the manifest has already been created. """ media_type = Manifest.media_type.get_id(manifest.media_type) created_manifest = None try: created_manifest = Manifest.create( repository=repository_id, digest=manifest.digest, media_type=media_type, manifest_bytes=manifest.bytes.as_encoded_str(), config_media_type=manifest.config_media_type, layers_compressed_size=manifest.layers_compressed_size, ) except IntegrityError as e: # NOTE: An IntegrityError means (barring a bug) that the manifest was created by # another caller while we were attempting to create it. Since we need to return # the manifest, we raise a specialized exception here to break out of the # transaction so we can retrieve it. if raise_on_error: raise _ManifestAlreadyExists(e) return created_manifest
def test_create_manifest_label(key, value, source_type, expected_error, initialized_db): manifest = Manifest.get() if expected_error: with pytest.raises(DataModelException) as ex: create_manifest_label(manifest, key, value, source_type) assert ex.match(expected_error) return label = create_manifest_label(manifest, key, value, source_type) labels = [ ml.label_id for ml in ManifestLabel.select().where( ManifestLabel.manifest == manifest) ] assert label.id in labels with assert_query_count(1): assert label in list_manifest_labels(manifest) assert label not in list_manifest_labels(manifest, 'someprefix') assert label in list_manifest_labels(manifest, key[0:2]) with assert_query_count(1): assert get_manifest_label(label.uuid, manifest) == label
def _lookup_manifest(repository_id, manifest_digest, allow_dead=False): query = (Manifest.select().where( Manifest.repository == repository_id).where( Manifest.digest == manifest_digest)) if allow_dead: try: return query.get() except Manifest.DoesNotExist: return None # Try first to filter to those manifests referenced by an alive tag, try: return filter_to_alive_tags(query.join(Tag)).get() except Manifest.DoesNotExist: pass # Try referenced as the child of a manifest that has an alive tag. query = query.join(ManifestChild, on=(ManifestChild.child_manifest == Manifest.id)).join( Tag, on=(Tag.manifest == ManifestChild.manifest)) query = filter_to_alive_tags(query) try: return query.get() except Manifest.DoesNotExist: return None
def needs_reindexing_query(indexer_hash): return (Manifest.select().join(ManifestSecurityStatus).where( ManifestSecurityStatus.index_status != IndexStatus.MANIFEST_UNSUPPORTED, ManifestSecurityStatus.indexer_hash != indexer_hash, ManifestSecurityStatus.last_indexed < reindex_threshold(), ))
def test_perform_indexing_api_request_failure_index(initialized_db): app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] expected_manifests = ( Manifest.select(fn.Max(Manifest.id)) .join(Repository) .join(User) .where(User.username == "devtable") ) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = "abc" secscan._secscan_api.index.side_effect = APIRequestFailure() next_token = secscan.perform_indexing() assert next_token is None assert ManifestSecurityStatus.select().count() == 0 # Set security scanner to return good results and attempt indexing again secscan._secscan_api.index.side_effect = None secscan._secscan_api.index.return_value = ( {"err": None, "state": IndexReportState.Index_Finished}, "abc", ) next_token = secscan.perform_indexing() assert next_token.min_id == expected_manifests.scalar() + 1 assert ManifestSecurityStatus.select().count() == expected_manifests.count()
def index_error_query(): return ( eligible_manifests(Manifest.select()) .switch(Manifest) .join(ManifestSecurityStatus) .where(ManifestSecurityStatus.index_status == IndexStatus.FAILED) )
def needs_reindexing_query(indexer_hash): return ( eligible_manifests(Manifest.select()) .switch(Manifest) .join(ManifestSecurityStatus) .where(ManifestSecurityStatus.indexer_hash != indexer_hash) )
def test_perform_indexing_needs_reindexing(initialized_db): app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] expected_manifests = ( Manifest.select().join(Repository).join(User).where(User.username == "devtable") ) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = "xyz" secscan._secscan_api.index.return_value = ( {"err": None, "state": IndexReportState.Index_Finished}, "xyz", ) for manifest in expected_manifests: ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, error_json={}, index_status=IndexStatus.COMPLETED, indexer_hash="abc", indexer_version=IndexerVersion.V4, metadata_json={}, ) secscan.perform_indexing() assert ManifestSecurityStatus.select().count() == expected_manifests.count() for mss in ManifestSecurityStatus.select(): assert mss.indexer_hash == "xyz"
def not_indexed_query(): return ( eligible_manifests(Manifest.select()) .switch(Manifest) .join(ManifestSecurityStatus, JOIN.LEFT_OUTER) .where(ManifestSecurityStatus.id >> None) )