Пример #1
0
def test_pull(local_engine_empty, pg_repo_remote, download_all):
    # Pull the schema from the remote
    # Here, it's the pg on local_engine that connects to the remote engine, so we can use the actual hostname
    # (as opposed to the one exposed to us). However, the clone procedure also uses that connection string to talk to
    # the remote. Hence, there's an /etc/hosts indirection on the host mapping the remote engine to localhost.
    clone(pg_repo_remote, local_repository=PG_MNT, download_all=download_all)
    PG_MNT.images.by_hash(pg_repo_remote.head.image_hash).checkout()

    head_1 = _add_image_to_repo(pg_repo_remote)

    # Check the data is unchanged on the pulled one.
    assert PG_MNT.run_sql("SELECT * FROM fruits") == [(1, "apple"),
                                                      (2, "orange")]

    with pytest.raises(ImageNotFoundError):
        PG_MNT.images.by_hash(head_1.image_hash)

    PG_MNT.pull()
    head_1 = PG_MNT.images.by_hash(head_1.image_hash)

    # Check out the newly-pulled commit and verify it has the same data.
    head_1.checkout()

    assert PG_MNT.run_sql("SELECT * FROM fruits") == [
        (1, "apple"),
        (2, "orange"),
        (3, "mayonnaise"),
    ]
    assert PG_MNT.head == head_1
Пример #2
0
def test_pull_download_error(local_engine_empty, unprivileged_pg_repo,
                             clean_minio, interrupted):
    # Same test backwards: if we're pulling and abort or fail the download, make sure we can
    # recover and retry pulling the repo.

    with patch.dict(
            "splitgraph.hooks.external_objects._EXTERNAL_OBJECT_HANDLERS",
        {"S3": _flaky_handler(interrupted)},
    ):
        with pytest.raises(Exception) as e:
            clone(unprivileged_pg_repo,
                  local_repository=PG_MNT,
                  download_all=True)

    # Check that the pull succeeded (repository registered locally) but the objects
    # are just marked as external, not downloaded
    assert repository_exists(PG_MNT)
    assert len(PG_MNT.objects.get_all_objects()) == 2
    assert len(PG_MNT.objects.get_downloaded_objects()) == 1
    assert len(
        PG_MNT.objects.get_external_object_locations(
            PG_MNT.objects.get_all_objects())) == 2
    assert (PG_MNT.run_sql(
        "SELECT COUNT(*) FROM splitgraph_meta.object_cache_status",
        return_shape=ResultShape.ONE_ONE,
    ) == 1)

    clone(unprivileged_pg_repo, local_repository=PG_MNT, download_all=True)
    assert len(PG_MNT.objects.get_all_objects()) == 2
    assert len(PG_MNT.objects.get_downloaded_objects()) == 2
    assert len(list(PG_MNT.images)) == 2
    assert (PG_MNT.run_sql(
        "SELECT COUNT(*) FROM splitgraph_meta.object_cache_status",
        return_shape=ResultShape.ONE_ONE,
    ) == 2)
Пример #3
0
def test_s3_presigned_url(local_engine_empty, unprivileged_pg_repo,
                          clean_minio):
    # Test the URL signing stored procedure works on the remote machine
    clone(unprivileged_pg_repo, local_repository=PG_MNT, download_all=False)
    PG_MNT.images["latest"].checkout()
    PG_MNT.run_sql("INSERT INTO fruits VALUES (3, 'mayonnaise')")
    head = PG_MNT.commit()
    object_id = head.get_table("fruits").objects[0]

    # Do a test calling the signer locally (the tests currently have access
    # to the S3 credentials on the host they're running on)
    urls_local = get_object_upload_urls("%s:%s" % (S3_HOST, S3_PORT),
                                        [object_id])
    assert len(urls_local) == 1
    assert len(urls_local[0]) == 3
    urls_local = get_object_download_urls("%s:%s" % (S3_HOST, S3_PORT),
                                          [object_id])
    assert len(urls_local) == 1
    assert len(urls_local[0]) == 3

    urls = unprivileged_pg_repo.engine.run_sql(
        "SELECT * FROM splitgraph_api.get_object_upload_urls(%s, %s)",
        ("%s:%s" % (S3_HOST, S3_PORT), [object_id]),
        return_shape=ResultShape.ONE_ONE,
    )
    assert len(urls) == 1
    assert len(urls[0]) == 3
Пример #4
0
def test_pull_single_image(local_engine_empty, pg_repo_remote, download_all):
    head = pg_repo_remote.head
    head_1 = _add_image_to_repo(pg_repo_remote)

    head.tag("tag_1")
    head_1.tag("tag_2")
    pg_repo_remote.commit_engines()

    # Clone a single image first
    assert len(PG_MNT.images()) == 0
    assert len(PG_MNT.objects.get_downloaded_objects()) == 0
    assert len(pg_repo_remote.images()) == 3
    clone(
        pg_repo_remote,
        local_repository=PG_MNT,
        download_all=download_all,
        single_image=head.image_hash[:12],
    )

    # Check only one image got downloaded and check we didn't try
    # to pull tags for images that we weren't pulling.
    assert len(PG_MNT.images()) == 1
    assert PG_MNT.images()[0] == head
    assert PG_MNT.images["tag_1"] == head
    assert PG_MNT.images.by_tag("tag_2", raise_on_none=False) is None

    # Try doing the same thing again
    clone(
        pg_repo_remote,
        local_repository=PG_MNT,
        download_all=download_all,
        single_image=head.image_hash[:12],
    )
    assert len(PG_MNT.images()) == 1

    # If we're downloading objects too, check only the original objects got downloaded
    if download_all:
        assert len(PG_MNT.objects.get_downloaded_objects()) == 2

    # Pull the remainder of the repo
    PG_MNT.pull(single_image=head_1.image_hash, download_all=download_all)
    assert len(PG_MNT.images()) == 2
    if download_all:
        assert len(PG_MNT.objects.get_downloaded_objects()) == 3

    assert PG_MNT.images["tag_2"] == head_1

    # Pull the whole repo
    PG_MNT.pull()
    assert len(PG_MNT.images()) == 3
Пример #5
0
def test_pulls_with_lazy_object_downloads(local_engine_empty, pg_repo_remote):
    clone(pg_repo_remote, local_repository=PG_MNT, download_all=False)
    # Make sure we haven't downloaded anything until checkout
    assert not PG_MNT.objects.get_downloaded_objects()

    remote_head = pg_repo_remote.head

    PG_MNT.images.by_hash(remote_head.image_hash).checkout()
    assert (len(PG_MNT.objects.get_downloaded_objects()) == 2
            )  # Original fruits and vegetables tables.
    assert sorted(PG_MNT.objects.get_downloaded_objects()) == sorted(
        PG_MNT.objects.get_all_objects())

    # In the meantime, make two branches off of origin (a total of 3 commits)
    pg_repo_remote.run_sql("INSERT INTO fruits VALUES (3, 'mayonnaise')")
    left = pg_repo_remote.commit()

    remote_head.checkout()
    pg_repo_remote.run_sql("INSERT INTO fruits VALUES (3, 'mustard')")
    right = pg_repo_remote.commit()

    # Pull from upstream.
    PG_MNT.pull(download_all=False)
    # Make sure we have the pointers to the three versions of the fruits table + the original vegetables
    assert len(PG_MNT.objects.get_all_objects()) == 4

    # Also make sure still only have the objects with the original fruits + vegetables tables
    assert len(PG_MNT.objects.get_downloaded_objects()) == 2

    # Check out left commit: since it only depends on the root, we should download just the new version of fruits.
    PG_MNT.images.by_hash(left.image_hash).checkout()

    assert (len(PG_MNT.objects.get_downloaded_objects()) == 3
            )  # now have 2 versions of fruits + 1 vegetables

    PG_MNT.images.by_hash(right.image_hash).checkout()
    assert (len(PG_MNT.objects.get_downloaded_objects()) == 4
            )  # now have 2 versions of fruits + 1 vegetables
    assert sorted(PG_MNT.objects.get_downloaded_objects()) == sorted(
        PG_MNT.objects.get_all_objects())
Пример #6
0
def test_pull_tag_overwriting(local_engine_empty, pg_repo_remote):
    head = pg_repo_remote.head
    head_1 = _add_image_to_repo(pg_repo_remote)

    head.tag("tag_1")
    head_1.tag("tag_2")
    head_1.tag("tag_3")
    pg_repo_remote.commit_engines()

    # Clone a single image
    clone(
        pg_repo_remote,
        local_repository=PG_MNT,
        single_image=head.image_hash[:12],
    )
    assert len(PG_MNT.images()) == 1
    assert PG_MNT.images()[0] == head
    assert PG_MNT.images["tag_1"] == head
    assert PG_MNT.images.by_tag("tag_2", raise_on_none=False) is None

    # Clone again, check nothing has changed.
    clone(
        pg_repo_remote,
        local_repository=PG_MNT,
        single_image=head.image_hash[:12],
    )
    assert len(PG_MNT.images()) == 1
    assert PG_MNT.images["tag_1"] == head
    assert PG_MNT.images.by_tag("tag_2", raise_on_none=False) is None

    # Pull the remainder of the repo
    PG_MNT.pull(single_image=head_1.image_hash)
    assert len(PG_MNT.images()) == 2
    assert PG_MNT.images["tag_2"] == head_1

    # Now update the tag on the remote
    head.tag("tag_2")
    pg_repo_remote.commit_engines()

    # Clone head again, check tag_2 wasn't overwritten (is still pointing to head_1)
    clone(
        pg_repo_remote,
        local_repository=PG_MNT,
        single_image=head.image_hash[:12],
    )
    assert PG_MNT.images["tag_1"] == head
    assert PG_MNT.images["tag_2"] == head_1
    assert PG_MNT.images["tag_3"] == head_1

    # Clone head again, this time overwriting the tag
    clone(
        pg_repo_remote,
        local_repository=PG_MNT,
        single_image=head.image_hash[:12],
        overwrite_tags=True,
    )
    assert len(PG_MNT.images()) == 2
    assert PG_MNT.images["tag_1"] == head
    assert PG_MNT.images["tag_2"] == head
    assert PG_MNT.images["tag_3"] == head_1

    # Update tag_3 to point to head as well
    head.tag("tag_3")
    pg_repo_remote.commit_engines()

    # Pull repo, check tag_3 hasn't moved.
    PG_MNT.pull()
    assert PG_MNT.images["tag_1"] == head
    assert PG_MNT.images["tag_2"] == head
    assert PG_MNT.images["tag_3"] == head_1

    # Pull again overwriting all tags, check tags have moved.
    PG_MNT.pull(overwrite_tags=True)
    assert PG_MNT.images["tag_1"] == head
    assert PG_MNT.images["tag_2"] == head
    assert PG_MNT.images["tag_3"] == head
Пример #7
0
def test_push(local_engine_empty, pg_repo_remote):
    # Clone from the remote engine like in the previous test.
    clone(pg_repo_remote, local_repository=PG_MNT)

    remote_head = pg_repo_remote.head
    PG_MNT.images.by_hash(remote_head.image_hash).checkout()

    # Then, change our copy and commit.
    PG_MNT.run_sql("INSERT INTO fruits VALUES (3, 'mayonnaise')")
    head_1 = PG_MNT.commit()

    # Now, push to remote.
    PG_MNT.push(remote_repository=pg_repo_remote)

    # See if the original mountpoint got updated.
    assert len(pg_repo_remote.objects.get_all_objects()) == 3

    pg_repo_remote.images.by_hash(head_1.image_hash).checkout()
    assert pg_repo_remote.run_sql("SELECT * FROM fruits") == [
        (1, "apple"),
        (2, "orange"),
        (3, "mayonnaise"),
    ]

    # Recommit the local image as a full snap and push it out.
    head_2 = PG_MNT.commit(snap_only=True)
    PG_MNT.push(remote_repository=pg_repo_remote)
    assert head_2.get_table(
        "fruits").objects[0] in pg_repo_remote.objects.get_all_objects()

    # Recommit it again, changing the sort order
    head_3 = PG_MNT.commit(snap_only=True,
                           in_fragment_order={"fruits": ["name"]},
                           overwrite=True)
    assert head_3.get_table("fruits").objects == head_2.get_table(
        "fruits").objects

    assert PG_MNT.run_sql(
        SQL("SELECT fruit_id FROM {}.{}").format(
            Identifier(SPLITGRAPH_META_SCHEMA),
            Identifier(head_2.get_table("fruits").objects[0])),
        return_shape=ResultShape.MANY_ONE,
    ) == [1, 3, 2]

    # Force push overwriting object meta and the actual object
    PG_MNT.push(
        remote_repository=pg_repo_remote,
        single_image=head_3.image_hash,
        overwrite_objects=True,
        reupload_objects=True,
    )

    assert pg_repo_remote.run_sql(
        SQL("SELECT fruit_id FROM {}.{}").format(
            Identifier(SPLITGRAPH_META_SCHEMA),
            Identifier(head_2.get_table("fruits").objects[0])),
        return_shape=ResultShape.MANY_ONE,
    ) == [1, 3, 2]
Пример #8
0
def test_s3_push_pull(local_engine_empty, unprivileged_pg_repo,
                      pg_repo_remote_registry, clean_minio):
    # Test pushing/pulling when the objects are uploaded to a remote storage instead of to the actual remote DB.

    # In the beginning, the registry has two objects, all remote
    objects = pg_repo_remote_registry.objects.get_all_objects()
    assert len(
        unprivileged_pg_repo.objects.get_external_object_locations(
            list(objects))) == 2
    assert len(objects) == 2

    clone(unprivileged_pg_repo, local_repository=PG_MNT, download_all=False)
    # Add a couple of commits, this time on the cloned copy.
    head = PG_MNT.images["latest"]
    head.checkout()
    PG_MNT.run_sql("INSERT INTO fruits VALUES (3, 'mayonnaise')")
    left = PG_MNT.commit()
    head.checkout()
    PG_MNT.run_sql("INSERT INTO fruits VALUES (3, 'mustard')")
    right = PG_MNT.commit()

    # Push to origin, but this time upload the actual objects instead.
    PG_MNT.push(remote_repository=unprivileged_pg_repo,
                handler="S3",
                handler_options={})

    # Check that the actual objects don't exist on the remote but are instead registered with an URL.
    # All the objects on pgcache were registered remotely
    objects = pg_repo_remote_registry.objects.get_all_objects()
    local_objects = PG_MNT.objects.get_all_objects()
    assert all(o in objects for o in local_objects)
    # Two new non-local objects in the local engine, both registered as non-local on the remote engine.
    ext_objects_orig = PG_MNT.objects.get_external_object_locations(
        list(objects))
    ext_objects_pull = unprivileged_pg_repo.objects.get_external_object_locations(
        list(objects))
    assert len(ext_objects_orig) == 4
    assert all(e in ext_objects_pull for e in ext_objects_orig)

    # Destroy the pulled mountpoint and recreate it again.
    assert len(PG_MNT.objects.get_downloaded_objects()) == 4
    PG_MNT.delete()
    # Make sure we don't have any leftover physical objects.
    PG_MNT.objects.cleanup()
    assert len(PG_MNT.objects.get_downloaded_objects()) == 0

    clone(unprivileged_pg_repo, local_repository=PG_MNT, download_all=False)

    # Proceed as per the lazy checkout tests to make sure we don't download more than required.
    # Make sure we still haven't downloaded anything.
    assert len(PG_MNT.objects.get_downloaded_objects()) == 0

    # Check out left commit: since it only depends on the root, we should download just the new version of fruits.
    left.checkout()
    assert (len(PG_MNT.objects.get_downloaded_objects()) == 3
            )  # now have 2 versions of fruits + 1 vegetables

    right.checkout()
    assert len(PG_MNT.objects.get_downloaded_objects()) == 4
    # Only now we actually have all the objects materialized.
    assert sorted(PG_MNT.objects.get_downloaded_objects()) == sorted(
        PG_MNT.objects.get_all_objects())
Пример #9
0
def test_push_upload_error(local_engine_empty, unprivileged_pg_repo,
                           pg_repo_remote_registry, clean_minio, interrupted):
    clone(unprivileged_pg_repo, local_repository=PG_MNT, download_all=False)
    PG_MNT.images["latest"].checkout()
    PG_MNT.run_sql("INSERT INTO fruits VALUES (3, 'mayonnaise')")
    PG_MNT.run_sql("INSERT INTO vegetables VALUES (3, 'cucumber')")
    head = PG_MNT.commit()

    # If the upload fails for whatever reason (e.g. Minio is inaccessible or the upload was aborted),
    # the whole push fails rather than leaving the registry in an inconsistent state.
    with patch.dict(
            "splitgraph.hooks.external_objects._EXTERNAL_OBJECT_HANDLERS",
        {"S3": _flaky_handler(incomplete=interrupted)},
    ):
        with pytest.raises(Exception) as e:
            PG_MNT.push(remote_repository=unprivileged_pg_repo,
                        handler="S3",
                        handler_options={})

    assert head not in unprivileged_pg_repo.images
    # Only the two original tables from the original image upstream
    assert (pg_repo_remote_registry.engine.run_sql(
        "SELECT COUNT(*) FROM splitgraph_meta.tables",
        return_shape=ResultShape.ONE_ONE) == 2)

    # Registry had 2 objects before the upload -- if we interrupted the upload,
    # we only managed to upload the first object that was registered (even if the image
    # wasn't).

    expected_object_count = 3 if interrupted else 2

    assert len(pg_repo_remote_registry.objects.get_all_objects()
               ) == expected_object_count

    # Two new objects not registered remotely since the upload failed
    assert (local_engine_empty.run_sql(
        "SELECT COUNT(*) FROM splitgraph_meta.object_locations",
        return_shape=ResultShape.ONE_ONE,
    ) == expected_object_count)
    assert (pg_repo_remote_registry.engine.run_sql(
        "SELECT COUNT(*) FROM splitgraph_meta.object_locations",
        return_shape=ResultShape.ONE_ONE,
    ) == expected_object_count)

    # Now do the push normally and check the image exists upstream.
    PG_MNT.push(remote_repository=unprivileged_pg_repo,
                handler="S3",
                handler_options={})

    assert any(i.image_hash == head.image_hash
               for i in unprivileged_pg_repo.images)

    assert len(pg_repo_remote_registry.objects.get_all_objects()) == 4

    assert (local_engine_empty.run_sql(
        "SELECT COUNT(*) FROM splitgraph_meta.object_locations",
        return_shape=ResultShape.ONE_ONE,
    ) == 4)
    assert (pg_repo_remote_registry.engine.run_sql(
        "SELECT COUNT(*) FROM splitgraph_meta.object_locations",
        return_shape=ResultShape.ONE_ONE,
    ) == 4)