Exemplo n.º 1
0
def sql_c(remote, show_all, json, query):
    """
    Run SQL on or connect to the Splitgraph Data Delivery Network.

    If a query isn't passed, this will return a libpq-compatible connection string to
    the registry's SQL endpoint. It can be used to connect to the endpoint with other SQL clients:

    ```
    pgcli $(sgr cloud sql)
    ```

    If a query is passed, this will run an SQL query against the SQL endpoint.
    """
    ddn_params = _get_ddn_conn_params(remote)
    from splitgraph.engine.postgres.engine import get_conn_str, PostgresEngine

    if not query:
        click.echo(get_conn_str(ddn_params))
        return

    # Build an engine to connect to the DDN, disable pre-flight API checks etc
    engine = PostgresEngine(name=remote,
                            conn_params=ddn_params,
                            registry=False,
                            check_version=False)

    try:
        results = engine.run_sql(query)
        emit_sql_results(results, use_json=json, show_all=show_all)
    finally:
        engine.close()
Exemplo n.º 2
0
def _make_changeset(
    engine: PostgresEngine,
    old_schema: str,
    old_table: str,
    schema: str,
    table: str,
    schema_spec: TableSchema,
    upsert_condition: str = "TRUE",
) -> Changeset:
    """Build a fake changeset from the temporary table and the existing table to pass
    to the object manager (store as a Splitgraph diff)."""

    # PK -> (upserted / deleted, old row, new row)
    # As a memory-saving hack, we only record the values of the old row (read from the
    # current table) -- this is because object creation routines read the inserted rows
    # from the staging table anyway.
    change_key = [c for c, _ in get_change_key(schema_spec)]
    # Query:
    # SELECT (new, pk, columns) AS pk,
    #        (custom upsert condition),
    #        (row_to_json(old non-pk cols)) AS old_row
    # FROM new_table n LEFT OUTER JOIN old_table o ON [o.pk = n.pk]
    # WHERE old row != new row
    query = (
        SQL("SELECT ") +
        SQL(",").join(SQL("n.") + Identifier(c) for c in change_key) +
        SQL(",") + SQL(upsert_condition + " AS upserted, ")
        # If PK doesn't exist in the new table, old_row is null, else output it
        + SQL("CASE WHEN ") + SQL(" AND ").join(
            SQL("o.{0} IS NULL").format(Identifier(c)) for c in change_key) +
        SQL(" THEN '{}'::json ELSE json_build_object(") + SQL(",").join(
            SQL("%s, o.") + Identifier(c.name)
            for c in schema_spec if c.name not in change_key) +
        SQL(") END AS old_row FROM {}.{} n LEFT OUTER JOIN {}.{} o ON "
            ).format(
                Identifier(schema),
                Identifier(table),
                Identifier(old_schema),
                Identifier(old_table),
            ) + SQL(" AND ").join(
                SQL("o.{0} = n.{0}").format(Identifier(c))
                for c in change_key) +
        SQL("WHERE o.* IS DISTINCT FROM n.*")).as_string(engine.connection)
    args = [c.name for c in schema_spec if c.name not in change_key]
    result = engine.run_sql(query, args)
    return {tuple(row[:-2]): (row[-2], row[-1], {}) for row in result}
Exemplo n.º 3
0
def _make_changeset(
    engine: PostgresEngine,
    schema: str,
    table: str,
    schema_spec: TableSchema,
    upsert_condition: str = "TRUE",
) -> Changeset:
    """Build a fake changeset from the temporary table and the existing table to pass
    to the object manager (store as a Splitgraph diff)."""

    # PK -> (upserted / deleted, old row, new row)
    # We don't find out the old row here. This is because it requires a JOIN on the current
    # Splitgraph table, so if we're adding e.g. 100k rows to a 1M row table, it's going to cause big
    # performance issues. Instead, we pretend that all rows
    # have been inserted (apart from the ones that have been deleted by having the magic
    # _sdc_deleted_at column).

    # We also don't care about finding out the new row here, as the storage routine queries
    # the table directly to get those values.

    # The tradeoff is that now, when querying the table, we need to include not only fragments
    # whose index matches the query, but also all fragments that might overwrite those fragments
    # (through PK range overlap). Since we don't record old row values in this changeset's index,
    # we can no longer find if a fragment deletes some row by inspecting the index -- we need to
    # use PK ranges to find out overlapping fragments.

    change_key = [c for c, _ in get_change_key(schema_spec)]
    # Query:
    # SELECT (col_1, col_2, ...) AS pk,
    #        (custom upsert condition),
    #        {} AS old_row
    # FROM new_table n
    query = (SQL("SELECT ") +
             SQL(",").join(SQL("n.") + Identifier(c)
                           for c in change_key) + SQL(",") +
             SQL(upsert_condition + " AS upserted FROM {}.{} n").format(
                 Identifier(schema), Identifier(table))).as_string(
                     engine.connection)
    result = engine.run_sql(query)
    return {tuple(row[:-1]): (row[-1], {}, {}) for row in result}
Exemplo n.º 4
0
def test_disjoint_table_lq_temp_table_deletion_doesnt_lock_up(pg_repo_local):
    # When Multicorn reads from the temporary table, it does that in the context of the
    # transaction that it's been called from. It hence can hold a read lock on the
    # temporary table that it won't release until the scan is over but the scan won't
    # be over until we've managed to delete the temporary table, leading to a deadlock.
    # Check deleting the table in a separate thread works.

    prepare_lq_repo(pg_repo_local, commit_after_every=True, include_pk=True)
    pg_repo_local.run_sql(
        "INSERT INTO fruits VALUES (4, 'fruit_4'), (5, 'fruit_5')")
    fruits = pg_repo_local.commit().get_table("fruits")

    tables, callback, plan = fruits.query_indirect(
        columns=["fruit_id", "name"], quals=None)

    # Force a materialization and get the query that makes us read from the temporary table.
    last_table = list(tables)[-1]
    pg_repo_local.commit_engines()

    # Simulate Multicorn reading from the table by doing it from a different engine.
    conn_params = _prepare_engine_config(CONFIG)
    engine = PostgresEngine(conn_params=conn_params, name="test_engine")
    query = _generate_select_query(engine, last_table, ["fruit_id", "name"])
    engine.run_sql(query)
    logging.info("Acquired read lock")

    # At this point, we're holding a lock on the table and callback will lock up, unless it's run
    # in a separate thread.
    callback(from_fdw=True)

    # Wait for the thread to actually start (a bit gauche but otherwise makes the test flaky(ier))
    time.sleep(1)

    # We still can read from the table (callback spawned a thread which is locked trying to delete it).
    engine.run_sql(query)

    # Now drop the lock.
    logging.info("Dropping the lock")
    engine.rollback()

    # If this test assertion fails, make sure we're not left holding the lock.
    engine.autocommit = True
    with pytest.raises(ObjectNotFoundError):
        engine.run_sql(query)
Exemplo n.º 5
0
def test_commandline_engine_creation_list_stop_deletion(teardown_test_engine):
    runner = CliRunner()
    client = docker.from_env()

    # Create an engine with default password and wait for it to initialize
    result = runner.invoke(
        add_engine_c,
        [
            "--image",
            _get_test_engine_image(),
            "--no-pull",
            "--port",
            "5428",
            "--username",
            "not_sgr",
            "--no-sgconfig",
            TEST_ENGINE_NAME,
        ],
        input="notsosecure\nnotsosecure\n",
    )
    assert result.exit_code == 0

    # Connect to the engine to check that it's up
    conn_params = {
        "SG_ENGINE_HOST": "localhost",
        "SG_ENGINE_PORT": "5428",
        "SG_ENGINE_USER": "******",
        "SG_ENGINE_PWD": "notsosecure",
        "SG_ENGINE_DB_NAME": "splitgraph",
        "SG_ENGINE_POSTGRES_DB_NAME": "postgres",
        "SG_ENGINE_ADMIN_USER": "******",
        "SG_ENGINE_ADMIN_PWD": "notsosecure",
    }

    engine = PostgresEngine(name="test", conn_params=conn_params)
    assert engine.run_sql("SELECT * FROM splitgraph_meta.images") == []
    engine.close()

    # List running engines
    result = runner.invoke(list_engines_c)
    assert result.exit_code == 0
    assert TEST_ENGINE_NAME in result.stdout
    assert "running" in result.stdout

    # Check engine version
    # (we didn't put it into the .sgconfig so have to patch instead)
    with patch("splitgraph.engine.get_engine", return_value=engine):
        result = runner.invoke(version_engine_c, [TEST_ENGINE_NAME])
        assert result.exit_code == 0
        assert __version__ in result.stdout

    # Get engine logs (no --follow since we won't be able to interrupt it)
    result = runner.invoke(log_engine_c, [TEST_ENGINE_NAME])
    assert "database system is ready to accept connections" in result.stdout

    # Try deleting the engine while it's still running
    with pytest.raises(docker.errors.APIError):
        runner.invoke(delete_engine_c, ["-y", TEST_ENGINE_NAME],
                      catch_exceptions=False)

    # Stop the engine
    result = runner.invoke(stop_engine_c, [TEST_ENGINE_NAME])
    assert result.exit_code == 0

    # Check it's not running
    for c in client.containers.list(filters={"ancestor": "splitgraph/engine"},
                                    all=False):
        assert c.name != "splitgraph_test_engine_" + TEST_ENGINE_NAME

    result = runner.invoke(list_engines_c)
    assert TEST_ENGINE_NAME not in result.stdout

    result = runner.invoke(list_engines_c, ["-a"])
    assert TEST_ENGINE_NAME in result.stdout

    # Bring it back up
    result = runner.invoke(start_engine_c, [TEST_ENGINE_NAME])
    assert result.exit_code == 0

    # Check it's running
    result = runner.invoke(list_engines_c)
    assert result.exit_code == 0
    assert TEST_ENGINE_NAME in result.stdout
    assert "running" in result.stdout

    # Try upgrading it to the same engine version as a smoke test
    with patch("splitgraph.engine.get_engine", return_value=engine):
        # Make sure the connection is closed as the client will use this Engine reference
        # after the upgrade to initialize it.
        engine.close()

        result = runner.invoke(
            upgrade_engine_c,
            [
                TEST_ENGINE_NAME, "--image",
                _get_test_engine_image(), "--no-pull"
            ],
            catch_exceptions=False,
        )
        assert result.exit_code == 0
        assert "Upgraded engine %s to %s" % (TEST_ENGINE_NAME,
                                             __version__) in result.stdout

        # Check the engine is running and has the right version
        result = runner.invoke(list_engines_c)
        assert result.exit_code == 0
        assert TEST_ENGINE_NAME in result.stdout
        assert "running" in result.stdout

        result = runner.invoke(version_engine_c, [TEST_ENGINE_NAME])
        assert result.exit_code == 0
        assert __version__ in result.stdout

    # Force delete it
    result = runner.invoke(delete_engine_c,
                           ["-f", "--with-volumes", TEST_ENGINE_NAME],
                           input="y\n")
    assert result.exit_code == 0

    # Check the engine (and the volumes) are gone
    for c in client.containers.list(filters={"ancestor": "splitgraph/engine"},
                                    all=False):
        assert c.name != "splitgraph_test_engine_" + TEST_ENGINE_NAME
    for v in client.volumes.list():
        assert not v.name.startswith("splitgraph_test_engine_" +
                                     TEST_ENGINE_NAME)