Пример #1
0
def test_base_fragment_reused_chunking(local_engine_empty):
    # Check that if we split a table into chunks and some chunks are the same, they get assigned to the same objects.
    OUTPUT.init()
    base = OUTPUT.head
    _make_test_table(OUTPUT)
    OUTPUT.commit(chunk_size=5)
    table_1 = OUTPUT.head.get_table("test")
    # Table 1 produced 3 objects
    assert len(OUTPUT.objects.get_all_objects()) == 3

    # All chunks are the same
    base.checkout()
    _make_test_table(OUTPUT)
    OUTPUT.commit(chunk_size=5)
    table_2 = OUTPUT.head.get_table("test")
    assert len(OUTPUT.objects.get_all_objects()) == 3
    assert table_1.objects == table_2.objects

    # Insert something else into the middle chunk so that it's different. This will get conflated so won't get recorded
    # as an update.
    base.checkout()
    _make_test_table(OUTPUT)
    OUTPUT.run_sql(
        "UPDATE test SET value_1 = 'UPDATED', value_2 = 42 WHERE key = 7")
    OUTPUT.commit(chunk_size=5)
    table_3 = OUTPUT.head.get_table("test")
    assert len(OUTPUT.objects.get_all_objects()) == 4
    # Table 3 reused the first and the last object but created a new one for the middle fragment.
    assert len(table_3.objects) == 3
    assert table_3.objects[0] == table_1.objects[0]
    assert table_3.objects[1] != table_1.objects[1]
    assert table_3.objects[2] == table_1.objects[2]
Пример #2
0
def test_import_bare(pg_repo_local):
    # Check import without checking anything out, just by manipulating metadata and running LQs against
    # source images.

    # Create a new schema and import 'fruits'
    OUTPUT.init()
    # Make sure the existing table is preserved.
    OUTPUT.run_sql("CREATE TABLE sentinel (key INTEGER)")
    OUTPUT.commit()
    pg_repo_local.uncheckout()
    OUTPUT.uncheckout()

    OUTPUT.import_tables(
        tables=["imported_fruits"],
        source_repository=pg_repo_local,
        image_hash=pg_repo_local.images["latest"].image_hash,
        source_tables=["SELECT * FROM fruits WHERE fruit_id = 1"],
        parent_hash=OUTPUT.images["latest"].image_hash,
        do_checkout=False,
        table_queries=[True],
    )

    assert OUTPUT.head is None
    assert pg_repo_local.head is None

    assert sorted(OUTPUT.images["latest"].get_tables()) == [
        "imported_fruits", "sentinel"
    ]
    assert list(OUTPUT.images["latest"].get_table("imported_fruits").query(
        columns=["name"], quals=[])) == [{
            "name": "apple"
        }]
Пример #3
0
def test_splitfile_end_to_end_with_uploading(local_engine_empty, remote_engine,
                                             pg_repo_remote_multitag,
                                             mg_repo_remote, clean_minio):
    # An end-to-end test:
    #   * Create a derived dataset from some tables imported from the remote engine
    #   * Push it back to the remote engine, uploading all objects to S3 (instead of the remote engine itself)
    #   * Delete everything from pgcache
    #   * Run another splitfile that depends on the just-pushed dataset (and does lazy checkouts to
    #     get the required tables).

    # Do the same setting up first and run the splitfile against the remote data.
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)

    remote_output = Repository(OUTPUT.namespace, OUTPUT.repository,
                               remote_engine)

    # Push with upload
    OUTPUT.push(remote_repository=remote_output,
                handler="S3",
                handler_options={})
    # Unmount everything locally and cleanup
    for mountpoint, _ in get_current_repositories(local_engine_empty):
        mountpoint.delete()
    OUTPUT.objects.cleanup()

    stage_2 = R("output_stage_2")
    execute_commands(
        load_splitfile("import_from_preuploaded_remote.splitfile"),
        output=stage_2)

    assert stage_2.run_sql("SELECT id, name, fruit, vegetable FROM diet") == [
        (2, "James", "orange", "carrot")
    ]
Пример #4
0
def test_splitfile_rebuild_update(local_engine_empty, pg_repo_remote_multitag):
    runner = CliRunner()

    result = runner.invoke(
        build_c,
        [
            RESOURCES + "import_remote_multiple.splitfile", "-a", "TAG", "v1",
            "-o", "output"
        ],
    )
    assert result.exit_code == 0

    # Rerun the output:latest against v2 of the test/pg_mount
    result = runner.invoke(rebuild_c,
                           ["output:latest", "--against", "test/pg_mount:v2"])
    output_v2 = OUTPUT.head
    assert result.exit_code == 0
    v2 = pg_repo_remote_multitag.images["v2"]
    assert output_v2.provenance() == [(pg_repo_remote_multitag, v2.image_hash)]

    # Now rerun the output:latest against the latest version of everything.
    # In this case, this should all resolve to the same version of test/pg_mount (v2) and not produce
    # any extra commits.
    curr_commits = OUTPUT.images()
    result = runner.invoke(rebuild_c, ["output:latest", "-u"])
    assert result.exit_code == 0
    assert output_v2 == OUTPUT.head
    assert OUTPUT.images() == curr_commits
Пример #5
0
def test_commandline_show_empty_image(local_engine_empty):
    # Check size calculations etc in an empty image don't cause errors.
    runner = CliRunner()
    OUTPUT.init()
    assert OUTPUT.images["latest"].get_size() == 0
    result = runner.invoke(show_c, [str(OUTPUT) + ":" + "000000000000"],
                           catch_exceptions=False)
    assert "Size: 0.00 B" in result.output
Пример #6
0
def _setup_dataset():
    OUTPUT.init()
    OUTPUT.run_sql("""CREATE TABLE test (id integer, name varchar);
        INSERT INTO test VALUES (1, 'test')""")
    OUTPUT.commit()
    OUTPUT.run_sql("INSERT INTO test VALUES (2, 'test2')")
    return OUTPUT.commit()
Пример #7
0
def test_from_remote(local_engine_empty, pg_repo_remote_multitag):
    # Test running commands that base new datasets on a remote repository.
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)

    new_head = OUTPUT.head
    parent = OUTPUT.images.by_hash(new_head.parent_id)
    # Go back to the parent: the two source tables should exist there
    parent.checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table")

    new_head.checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert OUTPUT.run_sql("SELECT * FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    # Now run the same splitfile but from the v2 of the remote (where row 1 has been removed from the fruits table)
    # First, remove the output mountpoint (the executor tries to fetch the commit 0000 from it otherwise which
    # doesn't exist).
    OUTPUT.delete()
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": "v2"},
                     output=OUTPUT)

    assert OUTPUT.run_sql("SELECT * FROM join_table") == [(2, "orange",
                                                           "carrot")]
Пример #8
0
def test_local_import_splitfile(pg_repo_local):
    execute_commands(load_splitfile("import_local.splitfile"), output=OUTPUT)
    head = OUTPUT.head
    old_head = head.parent_id

    OUTPUT.images.by_hash(old_head).checkout()
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")

    head.checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
Пример #9
0
def test_update_without_import_splitfile(pg_repo_local):
    # Test that correct commits are produced by executing an splitfile (both against newly created and already
    # existing tables on an existing mountpoint)
    execute_commands(load_splitfile("update_without_import.splitfile"),
                     output=OUTPUT)
    log = OUTPUT.head.get_log()

    log[1].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == []

    log[0].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple")]
Пример #10
0
def test_splitfile_sql_failure(local_engine_empty, pg_repo_remote_multitag):
    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    with pytest.raises(psycopg2.errors.UndefinedTable) as e:
        execute_commands(load_splitfile("import_remote_broken_stage_2.splitfile"), output=OUTPUT)
    assert 'relation "nonexistent_fruits_table" does not exist' in str(e.value)

    # Check the execution created the first dummy (000...) image and the second image
    # with IMPORT results
    assert len(OUTPUT.images()) == 2
    assert _get_table_count(OUTPUT) == 2
    assert sorted(OUTPUT.images["latest"].get_tables()) == ["my_fruits", "vegetables"]
Пример #11
0
def test_from_remote_hash(local_engine_empty, pg_repo_remote):
    head = pg_repo_remote.head.image_hash
    # Test running commands that base new datasets on a remote repository.
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": head[:10]},
                     output=OUTPUT)

    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert OUTPUT.run_sql("SELECT * FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]
Пример #12
0
def test_basic_splitfile(pg_repo_local):
    execute_commands(load_splitfile("create_table.splitfile"), output=OUTPUT)
    log = list(reversed(OUTPUT.head.get_log()))

    log[1].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == []

    log[2].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple")]

    log[3].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple"),
                                                         (2, "banana")]
Пример #13
0
def test_splitfile_cached(pg_repo_local):
    # Check that no new commits/snaps are created if we rerun the same splitfile
    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)
    images = OUTPUT.images()
    assert len(images) == 4

    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)
    new_images = OUTPUT.images()
    assert new_images == images
Пример #14
0
def test_import_and_update(local_engine_empty, unprivileged_pg_repo):
    OUTPUT.init()
    head = OUTPUT.head
    remote_head = unprivileged_pg_repo.images["latest"]
    # Import the 'fruits' table from the origin.
    import_table_from_remote(unprivileged_pg_repo, ["fruits"],
                             remote_head.image_hash,
                             OUTPUT,
                             target_tables=[])
    new_head = OUTPUT.head

    OUTPUT.run_sql("INSERT INTO fruits VALUES (3, 'mayonnaise')")
    new_head_2 = OUTPUT.commit()

    head.checkout()
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")

    new_head.checkout()
    assert OUTPUT.run_sql("SELECT * FROM fruits") == [(1, "apple"),
                                                      (2, "orange")]

    new_head_2.checkout()
    assert OUTPUT.run_sql("SELECT * FROM fruits") == [
        (1, "apple"),
        (2, "orange"),
        (3, "mayonnaise"),
    ]
Пример #15
0
def test_import_basic(pg_repo_local):
    # Create a new schema and import 'fruits' from the mounted PG table.
    OUTPUT.init()
    head = OUTPUT.head

    OUTPUT.import_tables(tables=["imported_fruits"],
                         source_repository=pg_repo_local,
                         source_tables=["fruits"])

    assert OUTPUT.run_sql("SELECT * FROM imported_fruits"
                          ) == pg_repo_local.run_sql("SELECT * FROM fruits")
    new_head = OUTPUT.head

    assert new_head != head
    assert new_head.parent_id == head.image_hash
Пример #16
0
def test_import_multiple_tables(pg_repo_local):
    OUTPUT.init()
    head = OUTPUT.head
    OUTPUT.import_tables(tables=[],
                         source_repository=pg_repo_local,
                         source_tables=[])

    for table_name in ["fruits", "vegetables"]:
        assert OUTPUT.run_sql("SELECT * FROM %s" %
                              table_name) == pg_repo_local.run_sql(
                                  "SELECT * FROM %s" % table_name)

    new_head = OUTPUT.head
    assert new_head != head
    assert new_head.parent_id == head.image_hash
Пример #17
0
def test_import_all(local_engine_empty):
    execute_commands(load_splitfile("import_all_from_mounted.splitfile"),
                     output=OUTPUT)

    head = OUTPUT.head
    old_head = OUTPUT.images.by_hash(head.parent_id)

    old_head.checkout()
    tables = ["vegetables", "fruits"]
    contents = [[(1, "potato"), (2, "carrot")], [(1, "apple"), (2, "orange")]]
    for t in tables:
        assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), t)

    head.checkout()
    for t, c in zip(tables, contents):
        assert OUTPUT.run_sql("SELECT * FROM %s" % t) == c
Пример #18
0
def test_splitfile_object_download_failure(local_engine_empty, pg_repo_remote_multitag):
    # Simulate an object download failure (that happens inside of the engine during IMPORT
    # execution) propagating to the caller and not leaving the engine in an inconsistent state.

    object_id = pg_repo_remote_multitag.images["v1"].get_table("fruits").objects[0]
    assert object_id == "o0e742bd2ea4927f5193a2c68f8d4c51ea018b1ef3e3005a50727147d2cf57b"
    tmp_object_id = "o" + "0" * 62

    pg_repo_remote_multitag.engine.run_sql(
        SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format(
            Identifier(object_id), Identifier(tmp_object_id)
        )
    )

    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    with pytest.raises(ObjectCacheError) as e:
        execute_commands(
            load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT
        )
    assert "Missing 1 object (%s)" % object_id in str(e.value)

    # Check the execution didn't create the image
    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    # Rename the object back and retry the Splitfile
    pg_repo_remote_multitag.engine.run_sql(
        SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format(
            Identifier(tmp_object_id), Identifier(object_id)
        )
    )

    execute_commands(
        load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT
    )
    OUTPUT.head.checkout()
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    assert len(OUTPUT.images()) == 3
    # 2 tables in the first non-empty image, 3 tables in the second image
    # (previous 2 + joined data).
    assert _get_table_count(OUTPUT) == 5
Пример #19
0
def test_splitfile_remote(local_engine_empty, pg_repo_remote_multitag):
    # We use the v1 tag when importing from the remote, so fruit_id = 1 still exists there.
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    # Now run the commands against v2 and make sure the fruit_id = 1 has disappeared from the output.
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v2"},
                     output=OUTPUT)
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (2, "orange", "carrot")
    ]
Пример #20
0
def test_splitfile_inline_sql(readonly_pg_repo, pg_repo_local):
    # Test SQL commands accessing repos directly -- join a remote repo with
    # some local data.

    prepare_lq_repo(pg_repo_local, commit_after_every=False, include_pk=True)
    pg_repo_local.head.tag("v2")

    execute_commands(
        load_splitfile("inline_sql.splitfile"),
        output=OUTPUT,
    )

    new_head = OUTPUT.head
    new_head.checkout()
    assert new_head.get_tables() == ["balanced_diet"]
    assert OUTPUT.run_sql("SELECT * FROM balanced_diet") == [
        (1, "apple", None, "potato"),
        (2, "orange", datetime.datetime(2019, 1, 1, 12, 0), "carrot"),
    ]

    local_repo_head = pg_repo_local.head.image_hash
    other_repo_head = readonly_pg_repo.images["latest"].image_hash

    assert new_head.provenance_data == [
        {
            "sources": [
                {
                    "source": "pg_mount",
                    "source_hash": other_repo_head,
                    "source_namespace": "otheruser",
                },
                {
                    "source": "pg_mount",
                    "source_hash": local_repo_head,
                    "source_namespace": "test"
                },
            ],
            "sql": ("CREATE TABLE balanced_diet\n"
                    "  AS SELECT fruits.fruit_id AS id\n"
                    "          , fruits.name AS fruit\n"
                    "          , my_fruits.timestamp AS timestamp\n"
                    "          , vegetables.name AS vegetable\n"
                    "     FROM "
                    '"otheruser/pg_mount:{0}".fruits '
                    "AS fruits\n"
                    "          INNER JOIN "
                    '"otheruser/pg_mount:{0}".vegetables '
                    "AS vegetables ON fruits.fruit_id = vegetable_id\n"
                    "          LEFT JOIN "
                    '"test/pg_mount:{1}".fruits '
                    "AS my_fruits ON my_fruits.fruit_id = fruits.fruit_id;\n"
                    "\n"
                    "ALTER TABLE balanced_diet ADD PRIMARY KEY (id)").format(
                        other_repo_head, local_repo_head),
            "type":
            "SQL",
        },
    ]
Пример #21
0
def test_import_from_remote(local_engine_empty, unprivileged_pg_repo):
    # Start with a clean repo -- add a table to output to see if it's preserved.
    head = _setup_dataset()

    local_objects = OUTPUT.objects

    assert len(local_objects.get_downloaded_objects()) == 2
    assert len(local_objects.get_all_objects()) == 2
    assert local_engine_empty.get_all_tables(OUTPUT.to_schema()) == ["test"]

    # Import the 'fruits' table from the origin.
    remote_head = unprivileged_pg_repo.images["latest"]
    import_table_from_remote(unprivileged_pg_repo, ["fruits"],
                             remote_head.image_hash,
                             OUTPUT,
                             target_tables=[])
    new_head = OUTPUT.head

    # Check that the table now exists in the output, is committed and there's no trace of the cloned repo.
    # Also clean up the unused objects to make sure that the newly cloned table is still recorded.
    assert sorted(local_engine_empty.get_all_tables(
        OUTPUT.to_schema())) == ["fruits", "test"]
    local_objects.cleanup()
    assert len(get_current_repositories(local_engine_empty)) == 1
    head.checkout()
    assert local_engine_empty.table_exists(OUTPUT.to_schema(), "test")
    assert not local_engine_empty.table_exists(OUTPUT.to_schema(), "fruits")

    new_head.checkout()
    assert local_engine_empty.table_exists(OUTPUT.to_schema(), "test")
    assert local_engine_empty.table_exists(OUTPUT.to_schema(), "fruits")

    assert OUTPUT.run_sql("SELECT * FROM fruits") == [(1, "apple"),
                                                      (2, "orange")]
Пример #22
0
def test_bloom_index_post_factum(local_engine_empty):
    OUTPUT.init()
    OUTPUT.run_sql(
        "CREATE TABLE test (key INTEGER PRIMARY KEY, value_1 TIMESTAMP)")
    for i in range(50):
        OUTPUT.run_sql("INSERT INTO test VALUES (%s, %s)",
                       (i + 1, dt(2015, 1, 1) + timedelta(days=i)))
    head = OUTPUT.commit()

    # Reindex the table after committing it and check that object metadata gets overwritten.
    head.get_table("test").reindex(
        extra_indexes={"bloom": {
            "value_1": {
                "probability": 0.01
            }
        }})

    obj = head.get_table("test").objects[0]
    object_index = OUTPUT.objects.get_object_meta([obj])[obj].object_index
    assert "bloom" in object_index

    # Run a snippet from the previous test to check the bloom metadata is valid.
    objects = head.get_table("test").objects
    for i in range(0, 50, 5):
        assert (filter_bloom_index(OUTPUT.engine, objects, [[
            ("value_1", "=", dt(2015, 1, 1) + timedelta(days=i))
        ]]) == objects)
Пример #23
0
def test_bloom_index_datetime(local_engine_empty):
    OUTPUT.init()
    OUTPUT.run_sql(
        "CREATE TABLE test (key INTEGER PRIMARY KEY, value_1 TIMESTAMP)")
    for i in range(50):
        OUTPUT.run_sql("INSERT INTO test VALUES (%s, %s)",
                       (i + 1, dt(2015, 1, 1) + timedelta(days=i)))
    head = OUTPUT.commit(
        extra_indexes={"test": {
            "bloom": {
                "value_1": {
                    "probability": 0.01
                }
            }
        }})
    objects = head.get_table("test").objects

    # Datetimes are supported in the bloom index if they're passed in as actual datetime
    # objects (which Multicorn does).

    # Spot check some dates that exist...
    for i in range(0, 50, 5):
        assert (filter_bloom_index(OUTPUT.engine, objects, [[
            ("value_1", "=", dt(2015, 1, 1) + timedelta(days=i))
        ]]) == objects)

    # ...and some that don't.
    assert (filter_bloom_index(
        OUTPUT.engine, objects,
        [[("value_1", "=", dt(2015, 1, 1) + timedelta(days=55))]]) == [])

    # They also work if passed in as ISO strings with space as a separator.
    assert (filter_bloom_index(
        OUTPUT.engine, objects,
        [[("value_1", "=", "2015-01-01 00:00:00")]]) == objects)
Пример #24
0
def test_splitfile_remote_hash(local_engine_empty, pg_repo_remote):
    head = pg_repo_remote.head.image_hash
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": head[:10]},
                     output=OUTPUT)
    assert OUTPUT.run_sql(
        "SELECT id, fruit, vegetable FROM output.join_table") == [
            (1, "apple", "potato"),
            (2, "orange", "carrot"),
        ]
Пример #25
0
def test_splitfile(local_engine_empty, pg_repo_remote):
    runner = CliRunner()

    result = runner.invoke(
        build_c,
        [
            RESOURCES + "import_remote_multiple.splitfile",
            "-a",
            "TAG",
            "latest",
            "-o",
            "output",
        ],
    )
    assert result.exit_code == 0
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    # Test the sgr provenance command. First, just list the dependencies of the new image.
    result = runner.invoke(provenance_c, ["output:latest"])
    assert "test/pg_mount:%s" % pg_repo_remote.images[
        "latest"].image_hash in result.output

    # Second, output the full splitfile (-f)
    result = runner.invoke(provenance_c, ["output:latest", "-f"])
    assert ("FROM test/pg_mount:%s IMPORT" %
            pg_repo_remote.images["latest"].image_hash in result.output)
    assert "SQL {CREATE TABLE join_table" in result.output

    # Test reverse dependencies
    # We're looking at test/pg_mount on the local engine which doesn't exist -- this should fail.
    result = runner.invoke(
        dependents_c,
        [
            "test/pg_mount:%s" % pg_repo_remote.images["latest"].image_hash,
        ],
    )
    assert result.exit_code == 1

    # Now look at test/pg_mount on the remote and look for dependents on the local engine.
    result = runner.invoke(
        dependents_c,
        [
            "test/pg_mount:%s" % pg_repo_remote.images["latest"].image_hash,
            "--dependents-on",
            "LOCAL",
            "--source-on",
            pg_repo_remote.engine.name,
        ],
    )
    assert result.exit_code == 0
    assert "is depended on by" in result.output
    assert "%s:%s" % (OUTPUT, OUTPUT.head.image_hash) in result.output
Пример #26
0
def test_splitfile_schema_changes(pg_repo_local, mg_repo_local):
    execute_commands(load_splitfile("schema_changes.splitfile"), output=OUTPUT)
    old_output_head = OUTPUT.head

    # Then, alter the dataset and rerun the splitfile.
    pg_repo_local.run_sql("INSERT INTO fruits VALUES (12, 'mayonnaise')")
    pg_repo_local.commit()
    execute_commands(load_splitfile("schema_changes.splitfile"), output=OUTPUT)
    new_output_head = OUTPUT.head

    old_output_head.checkout()
    assert OUTPUT.run_sql("SELECT * FROM spirit_fruits") == [("James",
                                                              "orange", 12)]

    new_output_head.checkout()
    # Mayonnaise joined with Alex, ID 12 + 10 = 22.
    assert OUTPUT.run_sql("SELECT * FROM spirit_fruits") == [
        ("James", "orange", 12),
        ("Alex", "mayonnaise", 22),
    ]
Пример #27
0
def test_splitfile_with_external_sql(readonly_pg_repo):

    # Tests are running from root so we pass in the path to the SQL manually to the splitfile.
    execute_commands(
        load_splitfile("external_sql.splitfile"),
        params={"EXTERNAL_SQL_FILE": RESOURCES + "external_sql.sql"},
        output=OUTPUT,
    )

    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]
Пример #28
0
def test_import_query_reuses_hash(pg_repo_local):
    OUTPUT.init()
    base = OUTPUT.head
    # Run two imports: one importing all rows from `fruits` (will reuse the original `fruits` object),
    # one importing just the first row (new hash, won't be reused).
    ih_v1 = OUTPUT.import_tables(
        source_repository=pg_repo_local,
        source_tables=[
            "SELECT * FROM fruits", "SELECT * FROM fruits WHERE fruit_id = 1"
        ],
        tables=["fruits_all", "fruits_one"],
        do_checkout=False,
        table_queries=[True, True],
    )
    v1 = OUTPUT.images.by_hash(ih_v1)
    assert v1.get_table("fruits_all").objects == pg_repo_local.head.get_table(
        "fruits").objects
    assert (len(OUTPUT.objects.get_all_objects()) == 3
            )  # Original fruits and vegetables + the 1-row import

    # Run the same set of imports again: this time both query results already exist and will be reused.
    base.checkout()
    ih_v2 = OUTPUT.import_tables(
        source_repository=pg_repo_local,
        source_tables=[
            "SELECT * FROM fruits", "SELECT * FROM fruits WHERE fruit_id = 1"
        ],
        tables=["fruits_all", "fruits_one"],
        do_checkout=False,
        table_queries=[True, True],
    )
    v2 = OUTPUT.images.by_hash(ih_v2)
    assert v2.get_table("fruits_all").objects == v1.get_table(
        "fruits_all").objects
    assert v2.get_table("fruits_one").objects == v1.get_table(
        "fruits_one").objects
    assert len(OUTPUT.objects.get_all_objects()
               ) == 3  # No new objects have been created.
Пример #29
0
def test_import_with_custom_query(pg_repo_local):
    # Test that importing with a custom query creates a new object
    pg_repo_local.run_sql("INSERT INTO fruits VALUES (3, 'mayonnaise');"
                          "INSERT INTO vegetables VALUES (3, 'oregano')")
    pg_repo_local.commit()

    all_current_objects = pg_repo_local.objects.get_all_objects()

    execute_commands(load_splitfile("import_with_custom_query.splitfile"),
                     output=OUTPUT)
    head = OUTPUT.head
    old_head = OUTPUT.images.by_hash(head.parent_id)

    # First two tables imported as new objects since they had a custom query, the other two get pointed
    # to the old pg_repo_local objects.
    tables = ["my_fruits", "o_vegetables", "vegetables", "all_fruits"]
    contents = [
        [(2, "orange")],
        [(1, "potato"), (3, "oregano")],
        [(1, "potato"), (2, "carrot"), (3, "oregano")],
        [(1, "apple"), (2, "orange"), (3, "mayonnaise")],
    ]

    old_head.checkout()
    engine = OUTPUT.engine
    for t in tables:
        assert not engine.table_exists(OUTPUT.to_schema(), t)

    head.checkout()
    for t, c in zip(tables, contents):
        assert sorted(OUTPUT.run_sql("SELECT * FROM %s" % t)) == sorted(c)

    for t in tables:
        objects = head.get_table(t).objects
        if t in ["my_fruits", "o_vegetables"]:
            assert all(o not in all_current_objects for o in objects)
        else:
            assert all(o in all_current_objects for o in objects)
Пример #30
0
def test_advanced_splitfile(pg_repo_local):
    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)

    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table")

    head = OUTPUT.head
    old_head = head.parent_id
    OUTPUT.images.by_hash(old_head).checkout()
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table")
    head.checkout()
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (2, "orange", "carrot")
    ]
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(2, "orange")]