Exemplo n.º 1
0
def test_createdb(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    env.zenith_cli.create_branch('test_createdb', 'empty')

    pg = env.postgres.create_start('test_createdb')
    log.info("postgres is running on 'test_createdb' branch")

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # Cause a 'relmapper' change in the original branch
            cur.execute('VACUUM FULL pg_class')

            cur.execute('CREATE DATABASE foodb')

            cur.execute('SELECT pg_current_wal_insert_lsn()')
            lsn = cur.fetchone()[0]

    # Create a branch
    env.zenith_cli.create_branch('test_createdb2',
                                 'test_createdb',
                                 ancestor_start_lsn=lsn)
    pg2 = env.postgres.create_start('test_createdb2')

    # Test that you can connect to the new database on both branches
    for db in (pg, pg2):
        db.connect(dbname='foodb').close()
Exemplo n.º 2
0
def test_seqscans(zenith_with_baseline: PgCompare, rows: int, iters: int, workers: int):
    env = zenith_with_baseline

    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('create table t (i integer);')
            cur.execute(f'insert into t values (generate_series(1,{rows}));')

            # Verify that the table is larger than shared_buffers
            cur.execute('''
            select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('t') as tbl_ize
            from pg_settings where name = 'shared_buffers'
            ''')
            row = cur.fetchone()
            shared_buffers = row[0]
            table_size = row[1]
            log.info(f"shared_buffers is {shared_buffers}, table size {table_size}")
            assert int(shared_buffers) < int(table_size)
            env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM)

            cur.execute(f"set max_parallel_workers_per_gather = {workers}")

            with env.record_duration('run'):
                for i in range(iters):
                    cur.execute('select count(*) from t;')
Exemplo n.º 3
0
def test_subxacts(zenith_simple_env: ZenithEnv, test_output_dir):
    env = zenith_simple_env
    env.zenith_cli.create_branch("test_subxacts", "empty")
    pg = env.postgres.create_start('test_subxacts')

    log.info("postgres is running on 'test_subxacts' branch")
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    cur.execute('''
        CREATE TABLE t1(i int, j int);
    ''')

    cur.execute('select pg_switch_wal();')

    # Issue 100 transactions, with 1000 subtransactions in each.
    for i in range(100):
        cur.execute('begin')
        for j in range(1000):
            cur.execute(f'savepoint sp{j}')
            cur.execute(f'insert into t1 values ({i}, {j})')
        cur.execute('commit')

    # force wal flush
    cur.execute('checkpoint')

    # Check that we can restore the content of the datadir correctly
    check_restored_datadir_content(test_output_dir, env, pg)
Exemplo n.º 4
0
 def kill_safekeeper(self, sk_dir):
     """Read pid file and kill process"""
     pid_file = os.path.join(sk_dir, "safekeeper.pid")
     with open(pid_file, "r") as f:
         pid = int(f.read())
         log.info(f"Killing safekeeper with pid {pid}")
         os.kill(pid, signal.SIGKILL)
Exemplo n.º 5
0
def wait_for_pageserver_catchup(pgmain: Postgres,
                                polling_interval=1,
                                timeout=60):
    started_at = time.time()

    received_lsn_lag = 1
    while received_lsn_lag > 0:
        elapsed = time.time() - started_at
        if elapsed > timeout:
            raise RuntimeError(
                f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()"
            )

        with closing(pgmain.connect()) as conn:
            with conn.cursor() as cur:

                cur.execute('''
                    select  pg_size_pretty(pg_cluster_size()),
                    pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag
                    FROM backpressure_lsns();
                ''')
                res = cur.fetchone()
                log.info(
                    f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}")
                received_lsn_lag = res[1]

        time.sleep(polling_interval)
Exemplo n.º 6
0
def test_gc_aggressive(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    env.zenith_cli.create_branch("test_gc_aggressive", "empty")
    pg = env.postgres.create_start('test_gc_aggressive')
    log.info('postgres is running on test_gc_aggressive branch')

    conn = pg.connect()
    cur = conn.cursor()

    cur.execute("SHOW zenith.zenith_timeline")
    timeline = cur.fetchone()[0]

    # Create table, and insert the first 100 rows
    cur.execute('CREATE TABLE foo (id int, counter int, t text)')
    cur.execute(f'''
        INSERT INTO foo
            SELECT g, 0, 'long string to consume some space' || g
            FROM generate_series(1, {num_rows}) g
    ''')
    cur.execute('CREATE INDEX ON foo(id)')

    asyncio.run(update_and_gc(env, pg, timeline))

    cur.execute('SELECT COUNT(*), SUM(counter) FROM foo')
    assert cur.fetchone() == (num_rows, updates_to_perform)
Exemplo n.º 7
0
def load(pg: Postgres, stop_event: threading.Event,
         load_ok_event: threading.Event):
    log.info("load started")

    inserted_ctr = 0
    failed = False
    while not stop_event.is_set():
        try:
            with pg_cur(pg) as cur:
                cur.execute("INSERT INTO load VALUES ('some payload')")
                inserted_ctr += 1
        except:
            if not failed:
                log.info("load failed")
            failed = True
            load_ok_event.clear()
        else:
            if failed:
                with pg_cur(pg) as cur:
                    # if we recovered after failure verify that we have correct number of rows
                    log.info("recovering at %s", inserted_ctr)
                    cur.execute("SELECT count(*) FROM load")
                    # it seems that sometimes transaction gets commited before we can acknowledge
                    # the result, so sometimes selected value is larger by one than we expect
                    assert cur.fetchone()[0] - inserted_ctr <= 1
                    log.info("successfully recovered %s", inserted_ctr)
                    failed = False
                    load_ok_event.set()
    log.info('load thread stopped')
Exemplo n.º 8
0
def test_createuser(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    env.zenith_cli.create_branch('test_createuser', 'empty')
    pg = env.postgres.create_start('test_createuser')
    log.info("postgres is running on 'test_createuser' branch")

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            # Cause a 'relmapper' change in the original branch
            cur.execute('CREATE USER testuser with password %s', ('testpwd', ))

            cur.execute('CHECKPOINT')

            cur.execute('SELECT pg_current_wal_insert_lsn()')
            lsn = cur.fetchone()[0]

    # Create a branch
    env.zenith_cli.create_branch('test_createuser2',
                                 'test_createuser',
                                 ancestor_start_lsn=lsn)
    pg2 = env.postgres.create_start('test_createuser2')

    # Test that you can connect to new branch as a new user
    assert pg2.safe_psql('select current_user',
                         user='******') == [('testuser', )]
Exemplo n.º 9
0
def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()
    # Create a branch for us
    env.zenith_cli.create_branch('test_backpressure')

    pg = env.postgres.create_start(
        'test_backpressure', config_lines=['max_replication_write_lag=30MB'])
    log.info("postgres is running on 'test_backpressure' branch")

    # setup check thread
    check_stop_event = threading.Event()
    check_thread = threading.Thread(target=check_backpressure,
                                    args=(pg, check_stop_event))
    check_thread.start()

    # Configure failpoint to slow down walreceiver ingest
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
            pscur.execute("failpoints walreceiver-after-ingest=sleep(20)")

    # FIXME
    # Wait for the check thread to start
    #
    # Now if load starts too soon,
    # check thread cannot auth, because it is not able to connect to the database
    # because of the lag and waiting for lsn to replay to arrive.
    time.sleep(2)

    with pg_cur(pg) as cur:
        # Create and initialize test table
        cur.execute("CREATE TABLE foo(x bigint)")

        inserts_to_do = 2000000
        rows_inserted = 0

        while check_thread.is_alive() and rows_inserted < inserts_to_do:
            try:
                cur.execute(
                    "INSERT INTO foo select from generate_series(1, 100000)")
                rows_inserted += 100000
            except Exception as e:
                if check_thread.is_alive():
                    log.info('stopping check thread')
                    check_stop_event.set()
                    check_thread.join()
                    assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly"
                else:
                    assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work."

        log.info(f"inserted {rows_inserted} rows")

    if check_thread.is_alive():
        log.info('stopping check thread')
        check_stop_event.set()
        check_thread.join()
        log.info('check thread stopped')
    else:
        assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work."
Exemplo n.º 10
0
 def show_statuses(safekeepers: List[Safekeeper], tenant_id: str,
                   timeline_id: str):
     for sk in safekeepers:
         http_cli = sk.http_client()
         try:
             status = http_cli.timeline_status(tenant_id, timeline_id)
             log.info(f"Safekeeper {sk.id} status: {status}")
         except Exception as e:
             log.info(f"Safekeeper {sk.id} status error: {e}")
Exemplo n.º 11
0
    def __exit__(self, exc_type, exc_value, traceback):
        log.info('Cleaning up all safekeeper and compute nodes')

        # Stop all the nodes
        if self.postgres is not None:
            self.postgres.stop()
        if self.safekeepers is not None:
            for sk_proc in self.safekeepers:
                self.kill_safekeeper(sk_proc.args[6])
Exemplo n.º 12
0
def test_broken(zenith_simple_env: ZenithEnv, pg_bin):
    env = zenith_simple_env

    env.zenith_cli.create_branch("test_broken", "empty")
    env.postgres.create_start("test_broken")
    log.info('postgres is running')

    log.info('THIS NEXT COMMAND WILL FAIL:')
    pg_bin.run('pgbench -i_am_a_broken_test'.split())
Exemplo n.º 13
0
def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
    # used to calculate delta in collect_stats
    last_lsn = .0

    # returns LSN and pg_wal size, all in MB
    def collect_stats(pg: Postgres, cur, enable_logs=True):
        nonlocal last_lsn
        assert pg.pgdata_dir is not None

        log.info('executing INSERT to generate WAL')
        cur.execute("select pg_current_wal_lsn()")
        current_lsn = lsn_from_hex(cur.fetchone()[0]) / 1024 / 1024
        pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir,
                                                'pg_wal')) / 1024 / 1024
        if enable_logs:
            log.info(
                f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB"
            )
        last_lsn = current_lsn
        return current_lsn, pg_wal_size

    # generates about ~20MB of WAL, to create at least one new segment
    def generate_wal(cur):
        cur.execute(
            "INSERT INTO t SELECT generate_series(1,300000), 'payload'")

    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_wal_deleted_after_broadcast')
    # Adjust checkpoint config to prevent keeping old WAL segments
    pg = env.postgres.create_start('test_wal_deleted_after_broadcast',
                                   config_lines=[
                                       'min_wal_size=32MB',
                                       'max_wal_size=32MB',
                                       'log_checkpoints=on'
                                   ])

    pg_conn = pg.connect()
    cur = pg_conn.cursor()
    cur.execute('CREATE TABLE t(key int, value text)')

    collect_stats(pg, cur)

    # generate WAL to simulate normal workload
    for i in range(5):
        generate_wal(cur)
        collect_stats(pg, cur)

    log.info('executing checkpoint')
    cur.execute('CHECKPOINT')
    wal_size_after_checkpoint = collect_stats(pg, cur)[1]

    # there shouldn't be more than 2 WAL segments (but dir may have archive_status files)
    assert wal_size_after_checkpoint < 16 * 2.5
Exemplo n.º 14
0
    def collect_metrics(message: str) -> List[TimelineMetrics]:
        with env.pageserver.http_client() as pageserver_http:
            timeline_details = [
                pageserver_http.timeline_detail(
                    tenant_id=tenant_id,
                    timeline_id=branch_names_to_timeline_ids[branch_name])
                for branch_name in branch_names
            ]
        # All changes visible to pageserver (last_record_lsn) should be
        # confirmed by safekeepers first. As we cannot atomically get
        # state of both pageserver and safekeepers, we should start with
        # pageserver. Looking at outdated data from pageserver is ok.
        # Asking safekeepers first is not ok because new commits may arrive
        # to both safekeepers and pageserver after we've already obtained
        # safekeepers' state, it will look contradictory.
        sk_metrics = [sk.http_client().get_metrics() for sk in env.safekeepers]

        timeline_metrics = []
        for timeline_detail in timeline_details:
            timeline_id: str = timeline_detail["timeline_id"]

            local_timeline_detail = timeline_detail.get('local')
            if local_timeline_detail is None:
                log.debug(
                    f"Timeline {timeline_id} is not present locally, skipping")
                continue

            m = TimelineMetrics(
                timeline_id=timeline_id,
                last_record_lsn=lsn_from_hex(
                    local_timeline_detail['last_record_lsn']),
            )
            for sk_m in sk_metrics:
                m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex,
                                                            timeline_id)])
                m.commit_lsns.append(sk_m.commit_lsn_inexact[(tenant_id.hex,
                                                              timeline_id)])

            for flush_lsn, commit_lsn in zip(m.flush_lsns, m.commit_lsns):
                # Invariant. May be < when transaction is in progress.
                assert commit_lsn <= flush_lsn, f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            # We only call collect_metrics() after a transaction is confirmed by
            # the compute node, which only happens after a consensus of safekeepers
            # has confirmed the transaction. We assume majority consensus here.
            assert (
                2 * sum(m.last_record_lsn <= lsn for lsn in m.flush_lsns) >
                zenith_env_builder.num_safekeepers
            ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            assert (
                2 * sum(m.last_record_lsn <= lsn for lsn in m.commit_lsns) >
                zenith_env_builder.num_safekeepers
            ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
            timeline_metrics.append(m)
        log.info(f"{message}: {timeline_metrics}")
        return timeline_metrics
Exemplo n.º 15
0
def test_parallel_copy(zenith_simple_env: ZenithEnv, n_parallel=5):
    env = zenith_simple_env
    env.zenith_cli.create_branch("test_parallel_copy", "empty")
    pg = env.postgres.create_start('test_parallel_copy')
    log.info("postgres is running on 'test_parallel_copy' branch")

    # Create test table
    conn = pg.connect()
    cur = conn.cursor()
    cur.execute(f'CREATE TABLE copytest (i int, t text)')

    # Run COPY TO to load the table with parallel connections.
    asyncio.run(parallel_load_same_table(pg, n_parallel))
Exemplo n.º 16
0
def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
    # One safekeeper is enough for this test.
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch('test_pageserver_restart')
    pg = env.postgres.create_start('test_pageserver_restart')

    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    # Create table, and insert some rows. Make it big enough that it doesn't fit in
    # shared_buffers, otherwise the SELECT after restart will just return answer
    # from shared_buffers without hitting the page server, which defeats the point
    # of this test.
    cur.execute('CREATE TABLE foo (t text)')
    cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 100000) g
    ''')

    # Verify that the table is larger than shared_buffers
    cur.execute('''
        select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize
        from pg_settings where name = 'shared_buffers'
    ''')
    row = cur.fetchone()
    log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
    assert int(row[0]) < int(row[1])

    # Stop and restart pageserver. This is a more or less graceful shutdown, although
    # the page server doesn't currently have a shutdown routine so there's no difference
    # between stopping and crashing.
    env.pageserver.stop()
    env.pageserver.start()

    # Stopping the pageserver breaks the connection from the postgres backend to
    # the page server, and causes the next query on the connection to fail. Start a new
    # postgres connection too, to avoid that error. (Ideally, the compute node would
    # handle that and retry internally, without propagating the error to the user, but
    # currently it doesn't...)
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    cur.execute("SELECT count(*) FROM foo")
    assert cur.fetchone() == (100000, )

    # Stop the page server by force, and restart it
    env.pageserver.stop()
    env.pageserver.start()
Exemplo n.º 17
0
def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
    env = zenith_simple_env

    env.zenith_cli.create_branch("test_zenith_regress", "empty")
    # Connect to postgres and create a database called "regression".
    pg = env.postgres.create_start('test_zenith_regress')
    pg.safe_psql('CREATE DATABASE regression')

    # Create some local directories for pg_regress to run in.
    runpath = os.path.join(test_output_dir, 'regress')
    mkdir_if_needed(runpath)
    mkdir_if_needed(os.path.join(runpath, 'testtablespace'))

    # Compute all the file locations that pg_regress will need.
    # This test runs zenith specific tests
    build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
    src_path = os.path.join(base_dir, 'test_runner/zenith_regress')
    bindir = os.path.join(pg_distrib_dir, 'bin')
    schedule = os.path.join(src_path, 'parallel_schedule')
    pg_regress = os.path.join(build_path, 'pg_regress')

    pg_regress_command = [
        pg_regress,
        '--use-existing',
        '--bindir={}'.format(bindir),
        '--dlpath={}'.format(build_path),
        '--schedule={}'.format(schedule),
        '--inputdir={}'.format(src_path),
    ]

    log.info(pg_regress_command)
    env_vars = {
        'PGPORT': str(pg.default_options['port']),
        'PGUSER': pg.default_options['user'],
        'PGHOST': pg.default_options['host'],
    }

    # Run the command.
    # We don't capture the output. It's not too chatty, and it always
    # logs the exact same data to `regression.out` anyway.
    with capsys.disabled():
        pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)

        # checkpoint one more time to ensure that the lsn we get is the latest one
        pg.safe_psql('CHECKPOINT')
        lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]

        # Check that we restore the content of the datadir correctly
        check_restored_datadir_content(test_output_dir, env, pg)
Exemplo n.º 18
0
    def collect_stats(pg: Postgres, cur, enable_logs=True):
        nonlocal last_lsn
        assert pg.pgdata_dir is not None

        log.info('executing INSERT to generate WAL')
        cur.execute("select pg_current_wal_lsn()")
        current_lsn = lsn_from_hex(cur.fetchone()[0]) / 1024 / 1024
        pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir,
                                                'pg_wal')) / 1024 / 1024
        if enable_logs:
            log.info(
                f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB"
            )
        last_lsn = current_lsn
        return current_lsn, pg_wal_size
Exemplo n.º 19
0
def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder, pg_bin: PgBin,
                          port_distributor: PortDistributor):

    # We don't really need the full environment for this test, just the
    # safekeepers would be enough.
    zenith_env_builder.num_safekeepers = 3
    env = zenith_env_builder.init_start()

    timeline_id = uuid.uuid4()
    tenant_id = uuid.uuid4()

    # write config for proposer
    pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata")
    pg = ProposerPostgres(pgdata_dir, pg_bin, timeline_id, tenant_id,
                          '127.0.0.1', port_distributor.get_port())
    pg.create_dir_config(env.get_safekeeper_connstrs())

    # valid lsn, which is not in the segment start, nor in zero segment
    epoch_start_lsn = 0x16B9188  # 0/16B9188
    begin_lsn = epoch_start_lsn

    # append and commit WAL
    lsn_after_append = []
    for i in range(3):
        res = env.safekeepers[i].append_logical_message(
            tenant_id,
            timeline_id,
            {
                "lm_prefix": "prefix",
                "lm_message": "message",
                "set_commit_lsn": True,
                "send_proposer_elected": True,
                "term": 2,
                "begin_lsn": begin_lsn,
                "epoch_start_lsn": epoch_start_lsn,
                "truncate_lsn": epoch_start_lsn,
            },
        )
        lsn_hex = lsn_to_hex(res["inserted_wal"]["end_lsn"])
        lsn_after_append.append(lsn_hex)
        log.info(f"safekeeper[{i}] lsn after append: {lsn_hex}")

    # run sync safekeepers
    lsn_after_sync = pg.sync_safekeepers()
    log.info(f"lsn after sync = {lsn_after_sync}")

    assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
Exemplo n.º 20
0
    def start_safekeeper(self, i):
        port = SafekeeperPort(
            pg=self.port_distributor.get_port(),
            http=self.port_distributor.get_port(),
        )

        safekeeper_dir = os.path.join(self.repo_dir, f"sk{i}")
        mkdir_if_needed(safekeeper_dir)

        args = [
            self.bin_safekeeper, "-l", f"127.0.0.1:{port.pg}", "--listen-http",
            f"127.0.0.1:{port.http}", "-D", safekeeper_dir, "--id",
            str(i), "--daemonize"
        ]

        log.info(f'Running command "{" ".join(args)}"')
        return subprocess.run(args, check=True)
Exemplo n.º 21
0
def test_broker(zenith_env_builder: ZenithEnvBuilder):
    zenith_env_builder.num_safekeepers = 3
    zenith_env_builder.broker = True
    zenith_env_builder.enable_local_fs_remote_storage()
    env = zenith_env_builder.init_start()

    env.zenith_cli.create_branch("test_broker", "main")
    pg = env.postgres.create_start('test_broker')
    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")

    # learn zenith timeline from compute
    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]

    # wait until remote_consistent_lsn gets advanced on all safekeepers
    clients = [sk.http_client() for sk in env.safekeepers]
    stat_before = [
        cli.timeline_status(tenant_id, timeline_id) for cli in clients
    ]
    log.info(f"statuses is {stat_before}")

    pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'")
    # force checkpoint to advance remote_consistent_lsn
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor() as pscur:
            pscur.execute(f"checkpoint {tenant_id} {timeline_id}")
    # and wait till remote_consistent_lsn propagates to all safekeepers
    started_at = time.time()
    while True:
        stat_after = [
            cli.timeline_status(tenant_id, timeline_id) for cli in clients
        ]
        if all(
                lsn_from_hex(s_after.remote_consistent_lsn) > lsn_from_hex(
                    s_before.remote_consistent_lsn)
                for s_after, s_before in zip(stat_after, stat_before)):
            break
        elapsed = time.time() - started_at
        if elapsed > 20:
            raise RuntimeError(
                f"timed out waiting {elapsed:.0f}s for remote_consistent_lsn propagation: status before {stat_before}, status current {stat_after}"
            )
        time.sleep(0.5)
Exemplo n.º 22
0
def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
    """ Run a process and capture its output

    Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
    where "cmd" is the name of the program and NNN is an incrementing
    counter.

    If those files already exist, we will overwrite them.
    Returns basepath for files with captured output.
    """
    assert type(cmd) is list
    base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
    basepath = os.path.join(capture_dir, base)
    stdout_filename = basepath + '.stdout'
    stderr_filename = basepath + '.stderr'

    with open(stdout_filename, 'w') as stdout_f:
        with open(stderr_filename, 'w') as stderr_f:
            log.info('(capturing output to "{}.stdout")'.format(base))
            subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)

    return basepath
Exemplo n.º 23
0
def test_config(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    env.zenith_cli.create_branch("test_config", "empty")

    # change config
    pg = env.postgres.create_start('test_config',
                                   config_lines=['log_min_messages=debug1'])
    log.info('postgres is running on test_config branch')

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('''
                SELECT setting
                FROM pg_settings
                WHERE
                    source != 'default'
                    AND source != 'override'
                    AND name = 'log_min_messages'
            ''')

            # check that config change was applied
            assert cur.fetchone() == ('debug1', )
Exemplo n.º 24
0
def test_timeline_size(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    # Branch at the point where only 100 rows were inserted
    new_timeline_id = env.zenith_cli.create_branch('test_timeline_size',
                                                   'empty')

    client = env.pageserver.http_client()
    timeline_details = assert_local(client, env.initial_tenant,
                                    new_timeline_id)
    assert timeline_details['local'][
        'current_logical_size'] == timeline_details['local'][
            'current_logical_size_non_incremental']

    pgmain = env.postgres.create_start("test_timeline_size")
    log.info("postgres is running on 'test_timeline_size' branch")

    with closing(pgmain.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute("SHOW zenith.zenith_timeline")

            # Create table, and insert the first 100 rows
            cur.execute("CREATE TABLE foo (t text)")
            cur.execute("""
                INSERT INTO foo
                    SELECT 'long string to consume some space' || g
                    FROM generate_series(1, 10) g
            """)

            res = assert_local(client, env.initial_tenant, new_timeline_id)
            local_details = res['local']
            assert local_details["current_logical_size"] == local_details[
                "current_logical_size_non_incremental"]
            cur.execute("TRUNCATE foo")

            res = assert_local(client, env.initial_tenant, new_timeline_id)
            local_details = res['local']
            assert local_details["current_logical_size"] == local_details[
                "current_logical_size_non_incremental"]
Exemplo n.º 25
0
def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
    env = zenith_simple_env
    env.zenith_cli.create_branch('test_dropdb', 'empty')
    pg = env.postgres.create_start('test_dropdb')
    log.info("postgres is running on 'test_dropdb' branch")

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('CREATE DATABASE foodb')

            cur.execute('SELECT pg_current_wal_insert_lsn()')
            lsn_before_drop = cur.fetchone()[0]

            cur.execute("SELECT oid FROM pg_database WHERE datname='foodb';")
            dboid = cur.fetchone()[0]

    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('DROP DATABASE foodb')

            cur.execute('CHECKPOINT')

            cur.execute('SELECT pg_current_wal_insert_lsn()')
            lsn_after_drop = cur.fetchone()[0]

    # Create two branches before and after database drop.
    env.zenith_cli.create_branch('test_before_dropdb',
                                 'test_dropdb',
                                 ancestor_start_lsn=lsn_before_drop)
    pg_before = env.postgres.create_start('test_before_dropdb')

    env.zenith_cli.create_branch('test_after_dropdb',
                                 'test_dropdb',
                                 ancestor_start_lsn=lsn_after_drop)
    pg_after = env.postgres.create_start('test_after_dropdb')

    # Test that database exists on the branch before drop
    pg_before.connect(dbname='foodb').close()

    # Test that database subdir exists on the branch before drop
    assert pg_before.pgdata_dir
    dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid)
    log.info(dbpath)

    assert os.path.isdir(dbpath) == True

    # Test that database subdir doesn't exist on the branch after drop
    assert pg_after.pgdata_dir
    dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid)
    log.info(dbpath)

    assert os.path.isdir(dbpath) == False

    # Check that we restore the content of the datadir correctly
    check_restored_datadir_content(test_output_dir, env, pg)
Exemplo n.º 26
0
def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
                          pageserver_bin: pathlib.Path,
                          remote_storage_mock_path: pathlib.Path, pg_port: int,
                          http_port: int):
    """
    cannot use ZenithPageserver yet because it depends on zenith cli
    which currently lacks support for multiple pageservers
    """
    cmd = [
        str(pageserver_bin),
        '--init',
        '--workdir',
        str(new_pageserver_dir),
        f"-c listen_pg_addr='localhost:{pg_port}'",
        f"-c listen_http_addr='localhost:{http_port}'",
        f"-c pg_distrib_dir='{pg_distrib_dir}'",
        f"-c id=2",
        f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}",
    ]

    subprocess.check_output(cmd, text=True)

    # actually run new pageserver
    cmd = [
        str(pageserver_bin),
        '--workdir',
        str(new_pageserver_dir),
        '--daemonize',
    ]
    log.info("starting new pageserver %s", cmd)
    out = subprocess.check_output(cmd, text=True)
    log.info("started new pageserver %s", out)
    try:
        yield
    finally:
        log.info("stopping new pageserver")
        pid = int((new_pageserver_dir / 'pageserver.pid').read_text())
        os.kill(pid, signal.SIGQUIT)
Exemplo n.º 27
0
def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env

    env.zenith_cli.create_branch("test_vm_bit_clear", "empty")
    pg = env.postgres.create_start('test_vm_bit_clear')

    log.info("postgres is running on 'test_vm_bit_clear' branch")
    pg_conn = pg.connect()
    cur = pg_conn.cursor()

    # Install extension containing function needed for test
    cur.execute('CREATE EXTENSION zenith_test_utils')

    # Create a test table and freeze it to set the VM bit.
    cur.execute('CREATE TABLE vmtest_delete (id integer PRIMARY KEY)')
    cur.execute('INSERT INTO vmtest_delete VALUES (1)')
    cur.execute('VACUUM FREEZE vmtest_delete')

    cur.execute('CREATE TABLE vmtest_update (id integer PRIMARY KEY)')
    cur.execute(
        'INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g')
    cur.execute('VACUUM FREEZE vmtest_update')

    # DELETE and UDPATE the rows.
    cur.execute('DELETE FROM vmtest_delete WHERE id = 1')
    cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1')

    # Branch at this point, to test that later
    env.zenith_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")

    # Clear the buffer cache, to force the VM page to be re-fetched from
    # the page server
    cur.execute('SELECT clear_buffer_cache()')

    # Check that an index-only scan doesn't see the deleted row. If the
    # clearing of the VM bit was not replayed correctly, this would incorrectly
    # return deleted row.
    cur.execute('''
    set enable_seqscan=off;
    set enable_indexscan=on;
    set enable_bitmapscan=off;
    ''')

    cur.execute('SELECT * FROM vmtest_delete WHERE id = 1')
    assert (cur.fetchall() == [])
    cur.execute('SELECT * FROM vmtest_update WHERE id = 1')
    assert (cur.fetchall() == [])

    cur.close()

    # Check the same thing on the branch that we created right after the DELETE
    #
    # As of this writing, the code in smgrwrite() creates a full-page image whenever
    # a dirty VM page is evicted. If the VM bit was not correctly cleared by the
    # earlier WAL record, the full-page image hides the problem. Starting a new
    # server at the right point-in-time avoids that full-page image.
    pg_new = env.postgres.create_start('test_vm_bit_clear_new')

    log.info("postgres is running on 'test_vm_bit_clear_new' branch")
    pg_new_conn = pg_new.connect()
    cur_new = pg_new_conn.cursor()

    cur_new.execute('''
    set enable_seqscan=off;
    set enable_indexscan=on;
    set enable_bitmapscan=off;
    ''')

    cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1')
    assert (cur_new.fetchall() == [])
    cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1')
    assert (cur_new.fetchall() == [])
Exemplo n.º 28
0
def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):

    # Use safekeeper in this test to avoid a subtle race condition.
    # Without safekeeper, walreceiver reconnection can stuck
    # because of IO deadlock.
    #
    # See https://github.com/zenithdb/zenith/issues/1068
    zenith_env_builder.num_safekeepers = 1
    env = zenith_env_builder.init_start()

    # Branch at the point where only 100 rows were inserted
    env.zenith_cli.create_branch('test_branch_behind')
    pgmain = env.postgres.create_start('test_branch_behind')
    log.info("postgres is running on 'test_branch_behind' branch")

    main_pg_conn = pgmain.connect()
    main_cur = main_pg_conn.cursor()

    main_cur.execute("SHOW zenith.zenith_timeline")
    timeline = main_cur.fetchone()[0]

    # Create table, and insert the first 100 rows
    main_cur.execute('CREATE TABLE foo (t text)')

    # keep some early lsn to test branch creation on out of date lsn
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    gced_lsn = main_cur.fetchone()[0]

    main_cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 100) g
    ''')
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_a = main_cur.fetchone()[0]
    log.info(f'LSN after 100 rows: {lsn_a}')

    # Insert some more rows. (This generates enough WAL to fill a few segments.)
    main_cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 200000) g
    ''')
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_b = main_cur.fetchone()[0]
    log.info(f'LSN after 200100 rows: {lsn_b}')

    # Branch at the point where only 100 rows were inserted
    env.zenith_cli.create_branch('test_branch_behind_hundred',
                                 'test_branch_behind',
                                 ancestor_start_lsn=lsn_a)

    # Insert many more rows. This generates enough WAL to fill a few segments.
    main_cur.execute('''
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 200000) g
    ''')
    main_cur.execute('SELECT pg_current_wal_insert_lsn()')

    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
    lsn_c = main_cur.fetchone()[0]
    log.info(f'LSN after 400100 rows: {lsn_c}')

    # Branch at the point where only 200100 rows were inserted
    env.zenith_cli.create_branch('test_branch_behind_more',
                                 'test_branch_behind',
                                 ancestor_start_lsn=lsn_b)

    pg_hundred = env.postgres.create_start('test_branch_behind_hundred')
    pg_more = env.postgres.create_start('test_branch_behind_more')

    # On the 'hundred' branch, we should see only 100 rows
    hundred_pg_conn = pg_hundred.connect()
    hundred_cur = hundred_pg_conn.cursor()
    hundred_cur.execute('SELECT count(*) FROM foo')
    assert hundred_cur.fetchone() == (100, )

    # On the 'more' branch, we should see 100200 rows
    more_pg_conn = pg_more.connect()
    more_cur = more_pg_conn.cursor()
    more_cur.execute('SELECT count(*) FROM foo')
    assert more_cur.fetchone() == (200100, )

    # All the rows are visible on the main branch
    main_cur.execute('SELECT count(*) FROM foo')
    assert main_cur.fetchone() == (400100, )

    # Check bad lsn's for branching

    # branch at segment boundary
    env.zenith_cli.create_branch('test_branch_segment_boundary',
                                 'test_branch_behind',
                                 ancestor_start_lsn="0/3000000")
    pg = env.postgres.create_start('test_branch_segment_boundary')
    cur = pg.connect().cursor()
    cur.execute('SELECT 1')
    assert cur.fetchone() == (1, )

    # branch at pre-initdb lsn
    with pytest.raises(Exception, match="invalid branch start lsn"):
        env.zenith_cli.create_branch('test_branch_preinitdb',
                                     ancestor_start_lsn="0/42")

    # branch at pre-ancestor lsn
    with pytest.raises(Exception, match="less than timeline ancestor lsn"):
        env.zenith_cli.create_branch('test_branch_preinitdb',
                                     'test_branch_behind',
                                     ancestor_start_lsn="0/42")

    # check that we cannot create branch based on garbage collected data
    with closing(env.pageserver.connect()) as psconn:
        with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
            # call gc to advace latest_gc_cutoff_lsn
            pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
            row = pscur.fetchone()
            print_gc_result(row)

    with pytest.raises(Exception, match="invalid branch start lsn"):
        # this gced_lsn is pretty random, so if gc is disabled this woudln't fail
        env.zenith_cli.create_branch('test_branch_create_fail',
                                     'test_branch_behind',
                                     ancestor_start_lsn=gced_lsn)

    # check that after gc everything is still there
    hundred_cur.execute('SELECT count(*) FROM foo')
    assert hundred_cur.fetchone() == (100, )

    more_cur.execute('SELECT count(*) FROM foo')
    assert more_cur.fetchone() == (200100, )

    main_cur.execute('SELECT count(*) FROM foo')
    assert main_cur.fetchone() == (400100, )
Exemplo n.º 29
0
def test_clog_truncate(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    env.zenith_cli.create_branch('test_clog_truncate', 'empty')

    # set agressive autovacuum to make sure that truncation will happen
    config = [
        'autovacuum_max_workers=10', 'autovacuum_vacuum_threshold=0',
        'autovacuum_vacuum_insert_threshold=0',
        'autovacuum_vacuum_cost_delay=0', 'autovacuum_vacuum_cost_limit=10000',
        'autovacuum_naptime =1s', 'autovacuum_freeze_max_age=100000'
    ]

    pg = env.postgres.create_start('test_clog_truncate', config_lines=config)
    log.info('postgres is running on test_clog_truncate branch')

    # Install extension containing function needed for test
    pg.safe_psql('CREATE EXTENSION zenith_test_utils')

    # Consume many xids to advance clog
    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('select test_consume_xids(1000*1000*10);')
            log.info('xids consumed')

            # call a checkpoint to trigger TruncateSubtrans
            cur.execute('CHECKPOINT;')

            # ensure WAL flush
            cur.execute('select txid_current()')
            log.info(cur.fetchone())

    # wait for autovacuum to truncate the pg_xact
    # XXX Is it worth to add a timeout here?
    pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), '0000')
    log.info(f"pg_xact_0000_path = {pg_xact_0000_path}")

    while os.path.isfile(pg_xact_0000_path):
        log.info(f"file exists. wait for truncation. "
                 "pg_xact_0000_path = {pg_xact_0000_path}")
        time.sleep(5)

    # checkpoint to advance latest lsn
    with closing(pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('CHECKPOINT;')
            cur.execute('select pg_current_wal_insert_lsn()')
            lsn_after_truncation = cur.fetchone()[0]

    # create new branch after clog truncation and start a compute node on it
    log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
    env.zenith_cli.create_branch('test_clog_truncate_new',
                                 'test_clog_truncate',
                                 ancestor_start_lsn=lsn_after_truncation)
    pg2 = env.postgres.create_start('test_clog_truncate_new')
    log.info('postgres is running on test_clog_truncate_new branch')

    # check that new node doesn't contain truncated segment
    pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), '0000')
    log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}")
    assert os.path.isfile(pg_xact_0000_path_new) is False
Exemplo n.º 30
0
def test_twophase(zenith_simple_env: ZenithEnv):
    env = zenith_simple_env
    env.zenith_cli.create_branch("test_twophase", "empty")
    pg = env.postgres.create_start(
        'test_twophase', config_lines=['max_prepared_transactions=5'])
    log.info("postgres is running on 'test_twophase' branch")

    conn = pg.connect()
    cur = conn.cursor()

    cur.execute('CREATE TABLE foo (t text)')

    # Prepare a transaction that will insert a row
    cur.execute('BEGIN')
    cur.execute("INSERT INTO foo VALUES ('one')")
    cur.execute("PREPARE TRANSACTION 'insert_one'")

    # Prepare another transaction that will insert a row
    cur.execute('BEGIN')
    cur.execute("INSERT INTO foo VALUES ('two')")
    cur.execute("PREPARE TRANSACTION 'insert_two'")

    # Prepare a transaction that will insert a row
    cur.execute('BEGIN')
    cur.execute("INSERT INTO foo VALUES ('three')")
    cur.execute("PREPARE TRANSACTION 'insert_three'")

    # Prepare another transaction that will insert a row
    cur.execute('BEGIN')
    cur.execute("INSERT INTO foo VALUES ('four')")
    cur.execute("PREPARE TRANSACTION 'insert_four'")

    # On checkpoint state data copied to files in
    # pg_twophase directory and fsynced
    cur.execute('CHECKPOINT')

    twophase_files = os.listdir(pg.pg_twophase_dir_path())
    log.info(twophase_files)
    assert len(twophase_files) == 4

    cur.execute("COMMIT PREPARED 'insert_three'")
    cur.execute("ROLLBACK PREPARED 'insert_four'")
    cur.execute('CHECKPOINT')

    twophase_files = os.listdir(pg.pg_twophase_dir_path())
    log.info(twophase_files)
    assert len(twophase_files) == 2

    # Create a branch with the transaction in prepared state
    env.zenith_cli.create_branch("test_twophase_prepared", "test_twophase")

    # Start compute on the new branch
    pg2 = env.postgres.create_start(
        'test_twophase_prepared',
        config_lines=['max_prepared_transactions=5'],
    )

    # Check that we restored only needed twophase files
    twophase_files2 = os.listdir(pg2.pg_twophase_dir_path())
    log.info(twophase_files2)
    assert twophase_files2.sort() == twophase_files.sort()

    conn2 = pg2.connect()
    cur2 = conn2.cursor()

    # On the new branch, commit one of the prepared transactions,
    # abort the other one.
    cur2.execute("COMMIT PREPARED 'insert_one'")
    cur2.execute("ROLLBACK PREPARED 'insert_two'")

    cur2.execute('SELECT * FROM foo')
    assert cur2.fetchall() == [('one', ), ('three', )]

    # Only one committed insert is visible on the original branch
    cur.execute('SELECT * FROM foo')
    assert cur.fetchall() == [('three', )]