Пример #1
0
def test_images_shared_cas(default_tag_policy, initialized_db):
    """
    A repository, each two tags, pointing to the same image, which has image storage with the same
    *CAS path*, but *distinct records*.

    Deleting the first tag should delete the first image, and its storage, but not the file in
    storage, as it shares its CAS path.
    """
    with assert_gc_integrity(expect_storage_removed=True):
        repository = create_repository()

        # Create two image storage records with the same content checksum.
        content = "hello world"
        digest = "sha256:" + hashlib.sha256(content).hexdigest()
        preferred = storage.preferred_locations[0]
        storage.put_content({preferred}, storage.blob_path(digest), content)

        is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
        is2 = database.ImageStorage.create(content_checksum=digest, uploading=False)

        location = database.ImageStorageLocation.get(name=preferred)

        database.ImageStoragePlacement.create(location=location, storage=is1)
        database.ImageStoragePlacement.create(location=location, storage=is2)

        # Ensure the CAS path exists.
        assert storage.exists({preferred}, storage.blob_path(digest))

        # Create two images in the repository, and two tags, each pointing to one of the storages.
        first_image = Image.create(
            docker_image_id="i1", repository=repository, storage=is1, ancestors="/"
        )

        second_image = Image.create(
            docker_image_id="i2", repository=repository, storage=is2, ancestors="/"
        )

        store_tag_manifest(
            repository.namespace_user.username,
            repository.name,
            "first",
            first_image.docker_image_id,
        )

        store_tag_manifest(
            repository.namespace_user.username,
            repository.name,
            "second",
            second_image.docker_image_id,
        )

        assert_not_deleted(repository, "i1", "i2")

        # Delete the first tag.
        delete_tag(repository, "first")
        assert_deleted(repository, "i1")
        assert_not_deleted(repository, "i2")

        # Ensure the CAS path still exists.
        assert storage.exists({preferred}, storage.blob_path(digest))
Пример #2
0
def test_image_with_cas(default_tag_policy, initialized_db):
    """
    A repository with a tag pointing to an image backed by CAS.

    Deleting and GCing the tag should result in the storage and its CAS data being removed.
    """
    with assert_gc_integrity(expect_storage_removed=True):
        repository = create_repository()

        # Create an image storage record under CAS.
        content = b"hello world"
        digest = "sha256:" + hashlib.sha256(content).hexdigest()
        preferred = storage.preferred_locations[0]
        storage.put_content({preferred}, storage.blob_path(digest), content)

        image_storage = database.ImageStorage.create(content_checksum=digest)
        location = database.ImageStorageLocation.get(name=preferred)
        database.ImageStoragePlacement.create(location=location, storage=image_storage)

        # Temp link so its available.
        model.blob.store_blob_record_and_temp_link_in_repo(
            repository, digest, location, len(content), 120
        )

        # Ensure the CAS path exists.
        assert storage.exists({preferred}, storage.blob_path(digest))

        # Store a manifest pointing to that path.
        builder = DockerSchema1ManifestBuilder(
            repository.namespace_user.username, repository.name, "first"
        )
        builder.insert_layer(
            digest,
            json.dumps(
                {
                    "id": "i1",
                }
            ),
        )

        # Store the manifest.
        manifest = builder.build(docker_v2_signing_key)

        repo_ref = RepositoryReference.for_repo_obj(repository)
        registry_model.create_manifest_and_retarget_tag(
            repo_ref, manifest, "first", storage, raise_on_error=True
        )

        # Delete the temp reference.
        _delete_temp_links(repository)

        # Delete the tag.
        delete_tag(repository, "first")

        assert_deleted(repository, "i1")

        # Ensure the CAS path is gone.
        assert not storage.exists({preferred}, storage.blob_path(digest))
Пример #3
0
def test_purge_repository_storage_blob(default_tag_policy, initialized_db):
    with populate_storage_for_gc():
        expected_blobs_removed_from_storage = set()
        preferred = storage.preferred_locations[0]

        # Check that existing uploadedblobs has an object in storage
        for repo in database.Repository.select().order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                assert storage.exists(
                    {preferred}, storage.blob_path(uploadedblob.blob.content_checksum)
                )

        # Remove eveyrhing
        for repo in database.Repository.select():  # .order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                # Check if only this repository is referencing the uploadedblob
                # If so, the blob should be removed from storage
                has_depedent_manifestblob = (
                    ManifestBlob.select()
                    .where(
                        ManifestBlob.blob == uploadedblob.blob,
                        ManifestBlob.repository != repo,
                    )
                    .count()
                )
                has_dependent_image = (
                    Image.select()
                    .where(
                        Image.storage == uploadedblob.blob,
                        Image.repository != repo,
                    )
                    .count()
                )
                has_dependent_uploadedblobs = (
                    UploadedBlob.select()
                    .where(
                        UploadedBlob == uploadedblob,
                        UploadedBlob.repository != repo,
                    )
                    .count()
                )

                if (
                    not has_depedent_manifestblob
                    and not has_dependent_image
                    and not has_dependent_uploadedblobs
                ):
                    expected_blobs_removed_from_storage.add(uploadedblob.blob)

            assert model.gc.purge_repository(repo, force=True)

        for removed_blob_from_storage in expected_blobs_removed_from_storage:
            assert not storage.exists(
                {preferred}, storage.blob_path(removed_blob_from_storage.content_checksum)
            )
Пример #4
0
def test_images_shared_cas_with_new_blob_table(default_tag_policy,
                                               initialized_db):
    """ A repository with a tag and image that shares its CAS path with a record in the new Blob
      table. Deleting the first tag should delete the first image, and its storage, but not the
      file in storage, as it shares its CAS path with the blob row.
  """
    with assert_gc_integrity(expect_storage_removed=True):
        repository = create_repository()

        # Create two image storage records with the same content checksum.
        content = "hello world"
        digest = "sha256:" + hashlib.sha256(content).hexdigest()
        preferred = storage.preferred_locations[0]
        storage.put_content({preferred}, storage.blob_path(digest), content)

        media_type = database.MediaType.get(name="text/plain")

        is1 = database.ImageStorage.create(content_checksum=digest,
                                           uploading=False)
        database.ApprBlob.create(digest=digest, size=0, media_type=media_type)

        location = database.ImageStorageLocation.get(name=preferred)
        database.ImageStoragePlacement.create(location=location, storage=is1)

        # Ensure the CAS path exists.
        assert storage.exists({preferred}, storage.blob_path(digest))

        # Create the image in the repository, and the tag.
        first_image = Image.create(docker_image_id="i1",
                                   repository=repository,
                                   storage=is1,
                                   ancestors="/")

        store_tag_manifest(
            repository.namespace_user.username,
            repository.name,
            "first",
            first_image.docker_image_id,
        )

        assert_not_deleted(repository, "i1")

        # Delete the tag.
        delete_tag(repository, "first")
        assert_deleted(repository, "i1")

        # Ensure the CAS path still exists, as it is referenced by the Blob table
        assert storage.exists({preferred}, storage.blob_path(digest))
Пример #5
0
def exportedlogs(file_id):
    # Only enable this endpoint if local storage is available.
    has_local_storage = False
    for storage_type, _ in app.config.get("DISTRIBUTED_STORAGE_CONFIG",
                                          {}).values():
        if storage_type == "LocalStorage":
            has_local_storage = True
            break

    if not has_local_storage:
        abort(404)

    JSON_MIMETYPE = "application/json"
    exported_logs_storage_path = app.config.get(
        "EXPORT_ACTION_LOGS_STORAGE_PATH", "exportedactionlogs")
    export_storage_path = os.path.join(exported_logs_storage_path, file_id)
    if not storage.exists(storage.preferred_locations, export_storage_path):
        abort(404)

    try:
        return send_file(
            storage.stream_read_file(storage.preferred_locations,
                                     export_storage_path),
            mimetype=JSON_MIMETYPE,
        )
    except IOError:
        logger.exception("Could not read exported logs")
        abort(403)
Пример #6
0
    def test_store_blob_on_first_time_download(self, proxy_manifest_response):
        proxy_mock = proxy_manifest_response(
            self.tag, HELLO_WORLD_SCHEMA2_MANIFEST_JSON,
            DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE)
        params = {
            "repository": self.repository,
            "digest": self.digest,
        }

        with patch("data.registry_model.registry_proxy_model.Proxy",
                   MagicMock(return_value=proxy_mock)):
            with patch("endpoints.v2.blob.model_cache",
                       NoopDataModelCache(TEST_CACHE_CONFIG)):
                conduct_call(
                    self.client,
                    "v2.download_blob",
                    url_for,
                    "GET",
                    params,
                    expected_code=200,
                    headers=self.headers,
                )

        path = get_layer_path(self.blob)
        assert path is not None

        placements = ImageStoragePlacement.filter(
            ImageStoragePlacement.storage == self.blob)
        locations = [placements.get().location.name]
        assert storage.exists(
            locations, path), f"blob not found in storage at path {path}"
Пример #7
0
def test_logrotateworker_with_cutoff(logs_model):
    days = 60
    start_timestamp = datetime(2019, 1, 1)

    # Make sure there are no existing logs
    found = _lookup_logs(logs_model, start_timestamp - timedelta(days=365),
                         start_timestamp + timedelta(days=365))
    assert not found

    # Create a new set of logs/indices.
    for day in range(0, days):
        logs_model.log_action(
            "push_repo",
            namespace_name="devtable",
            repository_name="simple",
            ip="1.2.3.4",
            timestamp=start_timestamp + timedelta(days=day),
        )

    # Get all logs
    logs = _lookup_logs(
        logs_model,
        start_timestamp - timedelta(days=days - 1),
        start_timestamp + timedelta(days=days + 1),
    )

    assert len(logs) == days

    # Set the cutoff datetime to be the midpoint of the logs
    midpoint = logs[0:len(logs) // 2]
    assert midpoint
    assert len(midpoint) < len(logs)

    worker = LogRotateWorker()
    cutoff_date = midpoint[-1].datetime

    # Archive the indices at or older than the cutoff date
    archived_files = worker._perform_archiving(cutoff_date)

    # Ensure the eariler logs were archived
    found = _lookup_logs(logs_model, start_timestamp,
                         cutoff_date - timedelta(seconds=1))
    assert not found

    # Check that the files were written to storage
    for archived_file in archived_files:
        assert storage.exists([SAVE_LOCATION],
                              os.path.join(SAVE_PATH, archived_file))

    # If current model uses ES, check that the indices were also deleted
    if isinstance(logs_model, DocumentLogsModel):
        assert len(logs_model.list_indices()) == days - (len(logs) // 2)
        for index in logs_model.list_indices():
            dt = datetime.strptime(index[len(INDEX_NAME_PREFIX):],
                                   INDEX_DATE_FORMAT)
            assert dt >= cutoff_date
Пример #8
0
def test_image_with_cas(default_tag_policy, initialized_db):
    """ A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag
      should result in the storage and its CAS data being removed.
  """
    with assert_gc_integrity(expect_storage_removed=True):
        repository = create_repository()

        # Create an image storage record under CAS.
        content = "hello world"
        digest = "sha256:" + hashlib.sha256(content).hexdigest()
        preferred = storage.preferred_locations[0]
        storage.put_content({preferred}, storage.blob_path(digest), content)

        image_storage = database.ImageStorage.create(content_checksum=digest,
                                                     uploading=False)
        location = database.ImageStorageLocation.get(name=preferred)
        database.ImageStoragePlacement.create(location=location,
                                              storage=image_storage)

        # Ensure the CAS path exists.
        assert storage.exists({preferred}, storage.blob_path(digest))

        # Create the image and the tag.
        first_image = Image.create(docker_image_id="i1",
                                   repository=repository,
                                   storage=image_storage,
                                   ancestors="/")

        store_tag_manifest(
            repository.namespace_user.username,
            repository.name,
            "first",
            first_image.docker_image_id,
        )

        assert_not_deleted(repository, "i1")

        # Delete the tag.
        delete_tag(repository, "first")
        assert_deleted(repository, "i1")

        # Ensure the CAS path is gone.
        assert not storage.exists({preferred}, storage.blob_path(digest))
Пример #9
0
def populate_storage_for_gc():
    """
    Populate FakeStorage with dummy data for each ImageStorage row.
    """
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        content = b"hello world"
        storage.put_content({preferred}, storage.blob_path(storage_row.content_checksum), content)
        assert storage.exists({preferred}, storage.blob_path(storage_row.content_checksum))

    yield
Пример #10
0
    def process_queue_item(self, job_details):
        logger.debug('Got chunk cleanup queue item: %s', job_details)
        storage_location = job_details['location']
        storage_path = job_details['path']

        if not storage.exists([storage_location], storage_path):
            logger.debug('Chunk already deleted')
            return

        try:
            storage.remove([storage_location], storage_path)
        except IOError:
            raise JobException()
Пример #11
0
def find_broken_storages():
    broken_storages = set()

    print("Checking storages...")
    placement_count = ImageStoragePlacement.select().count()
    placements = (ImageStoragePlacement.select().join(ImageStorage).switch(
        ImageStoragePlacement).join(ImageStorageLocation))

    for placement in tqdm(placements, total=placement_count):
        path = model.storage.get_layer_path(placement.storage)
        if not storage_system.exists([placement.location.name], path):
            broken_storages.add(placement.storage.id)

    return list(broken_storages)
Пример #12
0
def test_garbage_collect_storage(default_tag_policy, initialized_db):
    with populate_storage_for_gc():
        preferred = storage.preferred_locations[0]

        # Get a random sample of storages
        uploadedblobs = list(UploadedBlob.select())
        random_uploadedblobs = random.sample(
            uploadedblobs, random.randrange(1, len(uploadedblobs) + 1)
        )
        model.storage.garbage_collect_storage([b.blob.id for b in random_uploadedblobs])
        # Ensure that the blobs' storage weren't removed, since we didn't GC anything
        for uploadedblob in random_uploadedblobs:
            assert storage.exists(
                {preferred}, storage.blob_path(uploadedblob.blob.content_checksum)
            )
Пример #13
0
def verify_placements():
    encountered = set()

    iterator = yield_random_entries(
        lambda: ImageStorage.select().where(ImageStorage.uploading == False),
        ImageStorage.id,
        1000,
        ImageStorage.select(fn.Max(ImageStorage.id)).scalar(),
        1,
    )

    for storage_row, abt, _ in iterator:
        if storage_row.id in encountered:
            continue

        encountered.add(storage_row.id)

        logger.info("Checking placements for storage `%s`", storage_row.uuid)
        try:
            with_locations = model.storage.get_storage_by_uuid(
                storage_row.uuid)
        except model.InvalidImageException:
            logger.exception("Could not find storage `%s`", storage_row.uuid)
            continue

        storage_path = model.storage.get_layer_path(storage_row)
        locations_to_check = set(with_locations.locations)
        if locations_to_check:
            logger.info("Checking locations `%s` for storage `%s`",
                        locations_to_check, storage_row.uuid)
            for location in locations_to_check:
                logger.info("Checking location `%s` for storage `%s`",
                            location, storage_row.uuid)
                if not storage.exists([location], storage_path):
                    location_row = _get_location_row(location)
                    logger.info(
                        "Location `%s` is missing for storage `%s`; removing",
                        location,
                        storage_row.uuid,
                    )
                    (ImageStoragePlacement.delete().where(
                        ImageStoragePlacement.storage == storage_row,
                        ImageStoragePlacement.location == location_row,
                    ).execute())
Пример #14
0
def test_logarchiving(app):
  worker = ArchiveBuildLogsWorker()
  logs_mock = Mock()
  logs_mock.get_log_entries = Mock(return_value=(1, [{'some': 'entry'}]))

  # Add a build that is ready for archiving.
  build = model.create_build_for_testing()

  with patch('workers.buildlogsarchiver.buildlogsarchiver.build_logs', logs_mock):
    worker._archive_redis_buildlogs()

  # Ensure the get method was called.
  logs_mock.get_log_entries.assert_called_once()
  logs_mock.expire_status.assert_called_once()
  logs_mock.delete_log_entries.assert_called_once()

  # Ensure the build was marked as archived.
  assert model.get_build(build.uuid).logs_archived

  # Ensure a file was written to storage.
  assert storage.exists(['local_us'], 'logarchive/%s' % build.uuid)
Пример #15
0
def test_images_shared_cas(default_tag_policy, initialized_db):
    """
    A repository, each two tags, pointing to the same image, which has image storage with the same
    *CAS path*, but *distinct records*.

    Deleting the first tag should delete the first image, and its storage, but not the file in
    storage, as it shares its CAS path.
    """
    with assert_gc_integrity(expect_storage_removed=True):
        repository = create_repository()

        # Create two image storage records with the same content checksum.
        content = b"hello world"
        digest = "sha256:" + hashlib.sha256(content).hexdigest()
        preferred = storage.preferred_locations[0]
        storage.put_content({preferred}, storage.blob_path(digest), content)

        is1 = database.ImageStorage.create(content_checksum=digest)
        is2 = database.ImageStorage.create(content_checksum=digest)

        location = database.ImageStorageLocation.get(name=preferred)

        database.ImageStoragePlacement.create(location=location, storage=is1)
        database.ImageStoragePlacement.create(location=location, storage=is2)

        # Temp link so its available.
        model.blob.store_blob_record_and_temp_link_in_repo(
            repository, digest, location, len(content), 120)

        # Ensure the CAS path exists.
        assert storage.exists({preferred}, storage.blob_path(digest))

        repo_ref = RepositoryReference.for_repo_obj(repository)

        # Store a manifest pointing to that path as `first`.
        builder = DockerSchema1ManifestBuilder(
            repository.namespace_user.username, repository.name, "first")
        builder.insert_layer(
            digest,
            json.dumps({
                "id": "i1",
            }),
        )
        manifest = builder.build(docker_v2_signing_key)
        registry_model.create_manifest_and_retarget_tag(repo_ref,
                                                        manifest,
                                                        "first",
                                                        storage,
                                                        raise_on_error=True)

        tag_ref = registry_model.get_repo_tag(repo_ref, "first")
        manifest_ref = registry_model.get_manifest_for_tag(tag_ref)
        registry_model.populate_legacy_images_for_testing(
            manifest_ref, storage)

        # Store another as `second`.
        builder = DockerSchema1ManifestBuilder(
            repository.namespace_user.username, repository.name, "second")
        builder.insert_layer(
            digest,
            json.dumps({
                "id": "i2",
            }),
        )
        manifest = builder.build(docker_v2_signing_key)
        created, _ = registry_model.create_manifest_and_retarget_tag(
            repo_ref, manifest, "second", storage, raise_on_error=True)

        tag_ref = registry_model.get_repo_tag(repo_ref, "second")
        manifest_ref = registry_model.get_manifest_for_tag(tag_ref)
        registry_model.populate_legacy_images_for_testing(
            manifest_ref, storage)

        # Manually retarget the second manifest's blob to the second row.
        try:
            second_blob = ManifestBlob.get(manifest=created._db_id, blob=is1)
            second_blob.blob = is2
            second_blob.save()
        except ManifestBlob.DoesNotExist:
            second_blob = ManifestBlob.get(manifest=created._db_id, blob=is2)
            second_blob.blob = is1
            second_blob.save()

        # Delete the temp reference.
        _delete_temp_links(repository)

        # Ensure the legacy images exist.
        assert_not_deleted(repository, "i1", "i2")

        # Delete the first tag.
        delete_tag(repository, "first")
        assert_deleted(repository, "i1")
        assert_not_deleted(repository, "i2")

        # Ensure the CAS path still exists.
        assert storage.exists({preferred}, storage.blob_path(digest))