def test_splitfile_rebuild_update(local_engine_empty, pg_repo_remote_multitag): runner = CliRunner() result = runner.invoke( build_c, [ RESOURCES + "import_remote_multiple.splitfile", "-a", "TAG", "v1", "-o", "output" ], ) assert result.exit_code == 0 # Rerun the output:latest against v2 of the test/pg_mount result = runner.invoke(rebuild_c, ["output:latest", "--against", "test/pg_mount:v2"]) output_v2 = OUTPUT.head assert result.exit_code == 0 v2 = pg_repo_remote_multitag.images["v2"] assert output_v2.provenance() == [(pg_repo_remote_multitag, v2.image_hash)] # Now rerun the output:latest against the latest version of everything. # In this case, this should all resolve to the same version of test/pg_mount (v2) and not produce # any extra commits. curr_commits = OUTPUT.images() result = runner.invoke(rebuild_c, ["output:latest", "-u"]) assert result.exit_code == 0 assert output_v2 == OUTPUT.head assert OUTPUT.images() == curr_commits
def test_splitfile_cached(pg_repo_local): # Check that no new commits/snaps are created if we rerun the same splitfile execute_commands( load_splitfile("import_local_multiple_with_queries.splitfile"), output=OUTPUT) images = OUTPUT.images() assert len(images) == 4 execute_commands( load_splitfile("import_local_multiple_with_queries.splitfile"), output=OUTPUT) new_images = OUTPUT.images() assert new_images == images
def test_splitfile_sql_failure(local_engine_empty, pg_repo_remote_multitag): assert len(OUTPUT.images()) == 0 assert _get_table_count(OUTPUT) == 0 with pytest.raises(psycopg2.errors.UndefinedTable) as e: execute_commands(load_splitfile("import_remote_broken_stage_2.splitfile"), output=OUTPUT) assert 'relation "nonexistent_fruits_table" does not exist' in str(e.value) # Check the execution created the first dummy (000...) image and the second image # with IMPORT results assert len(OUTPUT.images()) == 2 assert _get_table_count(OUTPUT) == 2 assert sorted(OUTPUT.images["latest"].get_tables()) == ["my_fruits", "vegetables"]
def test_rollback_on_error(local_engine_empty): # For e.g. commit/checkout/other commands, we don't do commits/rollbacks # in the library itself and expect the caller to manage transactions. In CLI, # we need to make sure that erroneous transactions (e.g. interrupted SG commits) # are rolled back correctly instead of being committed. runner = CliRunner() OUTPUT.init() OUTPUT.run_sql( "CREATE TABLE test (key INTEGER PRIMARY KEY, value_1 VARCHAR, value_2 INTEGER)" ) for i in range(11): OUTPUT.run_sql("INSERT INTO test VALUES (%s, %s, %s)", (i + 1, chr(ord("a") + i), i * 2)) head = OUTPUT.commit(chunk_size=5, in_fragment_order={"test": ["key", "value_1"]}) assert len(OUTPUT.images()) == 2 assert len(OUTPUT.objects.get_all_objects()) == 3 _alter_diff_splitting_dataset() OUTPUT.commit_engines() # Simulate the commit getting interrupted by the first object going through and being # recorded, then a KeyboardInterrupt being raised. called_once = False def interrupted_register(*args, **kwargs): nonlocal called_once if called_once: raise BaseException("something went wrong") else: called_once = True return FragmentManager._register_object(*args, **kwargs) with patch( "splitgraph.core.fragment_manager.FragmentManager._register_object", side_effect=interrupted_register, ) as ro: with pytest.raises(BaseException): runner.invoke(cli, ["commit", OUTPUT.to_schema()]) # Check that no image/object metadata was written assert len(OUTPUT.images()) == 2 assert len(OUTPUT.objects.get_all_objects()) == 3 assert ro.call_count == 2 # Check that the data in the audit trigger wasn't deleted assert len( OUTPUT.engine.get_pending_changes(OUTPUT.to_schema(), table="test")) == 6
def test_splitfile_object_download_failure(local_engine_empty, pg_repo_remote_multitag): # Simulate an object download failure (that happens inside of the engine during IMPORT # execution) propagating to the caller and not leaving the engine in an inconsistent state. object_id = pg_repo_remote_multitag.images["v1"].get_table("fruits").objects[0] assert object_id == "o0e742bd2ea4927f5193a2c68f8d4c51ea018b1ef3e3005a50727147d2cf57b" tmp_object_id = "o" + "0" * 62 pg_repo_remote_multitag.engine.run_sql( SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format( Identifier(object_id), Identifier(tmp_object_id) ) ) assert len(OUTPUT.images()) == 0 assert _get_table_count(OUTPUT) == 0 with pytest.raises(ObjectCacheError) as e: execute_commands( load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT ) assert "Missing 1 object (%s)" % object_id in str(e.value) # Check the execution didn't create the image assert len(OUTPUT.images()) == 0 assert _get_table_count(OUTPUT) == 0 # Rename the object back and retry the Splitfile pg_repo_remote_multitag.engine.run_sql( SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format( Identifier(tmp_object_id), Identifier(object_id) ) ) execute_commands( load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT ) OUTPUT.head.checkout() assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ] assert len(OUTPUT.images()) == 3 # 2 tables in the first non-empty image, 3 tables in the second image # (previous 2 + joined data). assert _get_table_count(OUTPUT) == 5
def test_provenance_inline_sql(readonly_pg_repo, pg_repo_local): prepare_lq_repo(pg_repo_local, commit_after_every=False, include_pk=True) pg_repo_local.head.tag("v2") execute_commands( load_splitfile("inline_sql.splitfile"), output=OUTPUT, ) new_head = OUTPUT.head remote_input = readonly_pg_repo.images["latest"] local_input = pg_repo_local.images["latest"] assert set(new_head.provenance()) == { ( readonly_pg_repo, remote_input.image_hash, ), (pg_repo_local, local_input.image_hash), } assert remote_input.provenance(reverse=True, engine=OUTPUT.engine) == [ (OUTPUT, OUTPUT.head.image_hash) ] assert local_input.provenance(reverse=True, engine=OUTPUT.engine) == [ (OUTPUT, OUTPUT.head.image_hash) ] expected_sql = ("SQL {{CREATE TABLE balanced_diet\n" " AS SELECT fruits.fruit_id AS id\n" " , fruits.name AS fruit\n" " , my_fruits.timestamp AS timestamp\n" " , vegetables.name AS vegetable\n" " FROM " '"otheruser/pg_mount:{0}".fruits AS ' "fruits\n" " INNER JOIN " '"otheruser/pg_mount:{0}".vegetables ' "AS vegetables ON fruits.fruit_id = vegetable_id\n" " LEFT JOIN " '"test/pg_mount:{1}".fruits AS ' "my_fruits ON my_fruits.fruit_id = fruits.fruit_id;\n" "\n" "ALTER TABLE balanced_diet ADD PRIMARY KEY (id)}}").format( remote_input.image_hash, local_input.image_hash) assert new_head.to_splitfile() == [expected_sql] assert new_head.to_splitfile(source_replacement={ pg_repo_local: "new_local_tag", readonly_pg_repo: "new_remote_tag" }) == [ expected_sql.replace(remote_input.image_hash, "new_remote_tag").replace(local_input.image_hash, "new_local_tag") ] assert len(OUTPUT.images()) == 2 # Try rerunning the Splitfile against the same original data (check caching) rebuild_image( OUTPUT.head, source_replacement={ pg_repo_local: "latest", readonly_pg_repo: "latest" }, ) assert len(OUTPUT.images()) == 2 # Change pg_repo_local and rerun the Splitfile against it. pg_repo_local.run_sql( "UPDATE fruits SET timestamp = '2020-01-01 12:00:00' WHERE fruit_id = 2" ) new_head = pg_repo_local.commit() rebuild_image( OUTPUT.head, source_replacement={ pg_repo_local: new_head.image_hash, readonly_pg_repo: "latest" }, ) assert len(OUTPUT.images()) == 3 assert OUTPUT.run_sql("SELECT * FROM balanced_diet") == [ (1, "apple", None, "potato"), (2, "orange", datetime.datetime(2020, 1, 1, 12, 0), "carrot"), ]