def test_createdb(zenith_simple_env: ZenithEnv): env = zenith_simple_env env.zenith_cli.create_branch('test_createdb', 'empty') pg = env.postgres.create_start('test_createdb') log.info("postgres is running on 'test_createdb' branch") with closing(pg.connect()) as conn: with conn.cursor() as cur: # Cause a 'relmapper' change in the original branch cur.execute('VACUUM FULL pg_class') cur.execute('CREATE DATABASE foodb') cur.execute('SELECT pg_current_wal_insert_lsn()') lsn = cur.fetchone()[0] # Create a branch env.zenith_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn) pg2 = env.postgres.create_start('test_createdb2') # Test that you can connect to the new database on both branches for db in (pg, pg2): db.connect(dbname='foodb').close()
def test_seqscans(zenith_with_baseline: PgCompare, rows: int, iters: int, workers: int): env = zenith_with_baseline with closing(env.pg.connect()) as conn: with conn.cursor() as cur: cur.execute('create table t (i integer);') cur.execute(f'insert into t values (generate_series(1,{rows}));') # Verify that the table is larger than shared_buffers cur.execute(''' select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('t') as tbl_ize from pg_settings where name = 'shared_buffers' ''') row = cur.fetchone() shared_buffers = row[0] table_size = row[1] log.info(f"shared_buffers is {shared_buffers}, table size {table_size}") assert int(shared_buffers) < int(table_size) env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM) cur.execute(f"set max_parallel_workers_per_gather = {workers}") with env.record_duration('run'): for i in range(iters): cur.execute('select count(*) from t;')
def test_subxacts(zenith_simple_env: ZenithEnv, test_output_dir): env = zenith_simple_env env.zenith_cli.create_branch("test_subxacts", "empty") pg = env.postgres.create_start('test_subxacts') log.info("postgres is running on 'test_subxacts' branch") pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute(''' CREATE TABLE t1(i int, j int); ''') cur.execute('select pg_switch_wal();') # Issue 100 transactions, with 1000 subtransactions in each. for i in range(100): cur.execute('begin') for j in range(1000): cur.execute(f'savepoint sp{j}') cur.execute(f'insert into t1 values ({i}, {j})') cur.execute('commit') # force wal flush cur.execute('checkpoint') # Check that we can restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg)
def kill_safekeeper(self, sk_dir): """Read pid file and kill process""" pid_file = os.path.join(sk_dir, "safekeeper.pid") with open(pid_file, "r") as f: pid = int(f.read()) log.info(f"Killing safekeeper with pid {pid}") os.kill(pid, signal.SIGKILL)
def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60): started_at = time.time() received_lsn_lag = 1 while received_lsn_lag > 0: elapsed = time.time() - started_at if elapsed > timeout: raise RuntimeError( f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()" ) with closing(pgmain.connect()) as conn: with conn.cursor() as cur: cur.execute(''' select pg_size_pretty(pg_cluster_size()), pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag FROM backpressure_lsns(); ''') res = cur.fetchone() log.info( f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}") received_lsn_lag = res[1] time.sleep(polling_interval)
def test_gc_aggressive(zenith_simple_env: ZenithEnv): env = zenith_simple_env env.zenith_cli.create_branch("test_gc_aggressive", "empty") pg = env.postgres.create_start('test_gc_aggressive') log.info('postgres is running on test_gc_aggressive branch') conn = pg.connect() cur = conn.cursor() cur.execute("SHOW zenith.zenith_timeline") timeline = cur.fetchone()[0] # Create table, and insert the first 100 rows cur.execute('CREATE TABLE foo (id int, counter int, t text)') cur.execute(f''' INSERT INTO foo SELECT g, 0, 'long string to consume some space' || g FROM generate_series(1, {num_rows}) g ''') cur.execute('CREATE INDEX ON foo(id)') asyncio.run(update_and_gc(env, pg, timeline)) cur.execute('SELECT COUNT(*), SUM(counter) FROM foo') assert cur.fetchone() == (num_rows, updates_to_perform)
def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Event): log.info("load started") inserted_ctr = 0 failed = False while not stop_event.is_set(): try: with pg_cur(pg) as cur: cur.execute("INSERT INTO load VALUES ('some payload')") inserted_ctr += 1 except: if not failed: log.info("load failed") failed = True load_ok_event.clear() else: if failed: with pg_cur(pg) as cur: # if we recovered after failure verify that we have correct number of rows log.info("recovering at %s", inserted_ctr) cur.execute("SELECT count(*) FROM load") # it seems that sometimes transaction gets commited before we can acknowledge # the result, so sometimes selected value is larger by one than we expect assert cur.fetchone()[0] - inserted_ctr <= 1 log.info("successfully recovered %s", inserted_ctr) failed = False load_ok_event.set() log.info('load thread stopped')
def test_createuser(zenith_simple_env: ZenithEnv): env = zenith_simple_env env.zenith_cli.create_branch('test_createuser', 'empty') pg = env.postgres.create_start('test_createuser') log.info("postgres is running on 'test_createuser' branch") with closing(pg.connect()) as conn: with conn.cursor() as cur: # Cause a 'relmapper' change in the original branch cur.execute('CREATE USER testuser with password %s', ('testpwd', )) cur.execute('CHECKPOINT') cur.execute('SELECT pg_current_wal_insert_lsn()') lsn = cur.fetchone()[0] # Create a branch env.zenith_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn) pg2 = env.postgres.create_start('test_createuser2') # Test that you can connect to new branch as a new user assert pg2.safe_psql('select current_user', user='******') == [('testuser', )]
def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() # Create a branch for us env.zenith_cli.create_branch('test_backpressure') pg = env.postgres.create_start( 'test_backpressure', config_lines=['max_replication_write_lag=30MB']) log.info("postgres is running on 'test_backpressure' branch") # setup check thread check_stop_event = threading.Event() check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event)) check_thread.start() # Configure failpoint to slow down walreceiver ingest with closing(env.pageserver.connect()) as psconn: with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: pscur.execute("failpoints walreceiver-after-ingest=sleep(20)") # FIXME # Wait for the check thread to start # # Now if load starts too soon, # check thread cannot auth, because it is not able to connect to the database # because of the lag and waiting for lsn to replay to arrive. time.sleep(2) with pg_cur(pg) as cur: # Create and initialize test table cur.execute("CREATE TABLE foo(x bigint)") inserts_to_do = 2000000 rows_inserted = 0 while check_thread.is_alive() and rows_inserted < inserts_to_do: try: cur.execute( "INSERT INTO foo select from generate_series(1, 100000)") rows_inserted += 100000 except Exception as e: if check_thread.is_alive(): log.info('stopping check thread') check_stop_event.set() check_thread.join() assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly" else: assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work." log.info(f"inserted {rows_inserted} rows") if check_thread.is_alive(): log.info('stopping check thread') check_stop_event.set() check_thread.join() log.info('check thread stopped') else: assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work."
def show_statuses(safekeepers: List[Safekeeper], tenant_id: str, timeline_id: str): for sk in safekeepers: http_cli = sk.http_client() try: status = http_cli.timeline_status(tenant_id, timeline_id) log.info(f"Safekeeper {sk.id} status: {status}") except Exception as e: log.info(f"Safekeeper {sk.id} status error: {e}")
def __exit__(self, exc_type, exc_value, traceback): log.info('Cleaning up all safekeeper and compute nodes') # Stop all the nodes if self.postgres is not None: self.postgres.stop() if self.safekeepers is not None: for sk_proc in self.safekeepers: self.kill_safekeeper(sk_proc.args[6])
def test_broken(zenith_simple_env: ZenithEnv, pg_bin): env = zenith_simple_env env.zenith_cli.create_branch("test_broken", "empty") env.postgres.create_start("test_broken") log.info('postgres is running') log.info('THIS NEXT COMMAND WILL FAIL:') pg_bin.run('pgbench -i_am_a_broken_test'.split())
def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder): # used to calculate delta in collect_stats last_lsn = .0 # returns LSN and pg_wal size, all in MB def collect_stats(pg: Postgres, cur, enable_logs=True): nonlocal last_lsn assert pg.pgdata_dir is not None log.info('executing INSERT to generate WAL') cur.execute("select pg_current_wal_lsn()") current_lsn = lsn_from_hex(cur.fetchone()[0]) / 1024 / 1024 pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, 'pg_wal')) / 1024 / 1024 if enable_logs: log.info( f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB" ) last_lsn = current_lsn return current_lsn, pg_wal_size # generates about ~20MB of WAL, to create at least one new segment def generate_wal(cur): cur.execute( "INSERT INTO t SELECT generate_series(1,300000), 'payload'") zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_wal_deleted_after_broadcast') # Adjust checkpoint config to prevent keeping old WAL segments pg = env.postgres.create_start('test_wal_deleted_after_broadcast', config_lines=[ 'min_wal_size=32MB', 'max_wal_size=32MB', 'log_checkpoints=on' ]) pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute('CREATE TABLE t(key int, value text)') collect_stats(pg, cur) # generate WAL to simulate normal workload for i in range(5): generate_wal(cur) collect_stats(pg, cur) log.info('executing checkpoint') cur.execute('CHECKPOINT') wal_size_after_checkpoint = collect_stats(pg, cur)[1] # there shouldn't be more than 2 WAL segments (but dir may have archive_status files) assert wal_size_after_checkpoint < 16 * 2.5
def collect_metrics(message: str) -> List[TimelineMetrics]: with env.pageserver.http_client() as pageserver_http: timeline_details = [ pageserver_http.timeline_detail( tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name]) for branch_name in branch_names ] # All changes visible to pageserver (last_record_lsn) should be # confirmed by safekeepers first. As we cannot atomically get # state of both pageserver and safekeepers, we should start with # pageserver. Looking at outdated data from pageserver is ok. # Asking safekeepers first is not ok because new commits may arrive # to both safekeepers and pageserver after we've already obtained # safekeepers' state, it will look contradictory. sk_metrics = [sk.http_client().get_metrics() for sk in env.safekeepers] timeline_metrics = [] for timeline_detail in timeline_details: timeline_id: str = timeline_detail["timeline_id"] local_timeline_detail = timeline_detail.get('local') if local_timeline_detail is None: log.debug( f"Timeline {timeline_id} is not present locally, skipping") continue m = TimelineMetrics( timeline_id=timeline_id, last_record_lsn=lsn_from_hex( local_timeline_detail['last_record_lsn']), ) for sk_m in sk_metrics: m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex, timeline_id)]) m.commit_lsns.append(sk_m.commit_lsn_inexact[(tenant_id.hex, timeline_id)]) for flush_lsn, commit_lsn in zip(m.flush_lsns, m.commit_lsns): # Invariant. May be < when transaction is in progress. assert commit_lsn <= flush_lsn, f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" # We only call collect_metrics() after a transaction is confirmed by # the compute node, which only happens after a consensus of safekeepers # has confirmed the transaction. We assume majority consensus here. assert ( 2 * sum(m.last_record_lsn <= lsn for lsn in m.flush_lsns) > zenith_env_builder.num_safekeepers ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" assert ( 2 * sum(m.last_record_lsn <= lsn for lsn in m.commit_lsns) > zenith_env_builder.num_safekeepers ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" timeline_metrics.append(m) log.info(f"{message}: {timeline_metrics}") return timeline_metrics
def test_parallel_copy(zenith_simple_env: ZenithEnv, n_parallel=5): env = zenith_simple_env env.zenith_cli.create_branch("test_parallel_copy", "empty") pg = env.postgres.create_start('test_parallel_copy') log.info("postgres is running on 'test_parallel_copy' branch") # Create test table conn = pg.connect() cur = conn.cursor() cur.execute(f'CREATE TABLE copytest (i int, t text)') # Run COPY TO to load the table with parallel connections. asyncio.run(parallel_load_same_table(pg, n_parallel))
def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder): # One safekeeper is enough for this test. zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() env.zenith_cli.create_branch('test_pageserver_restart') pg = env.postgres.create_start('test_pageserver_restart') pg_conn = pg.connect() cur = pg_conn.cursor() # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. cur.execute('CREATE TABLE foo (t text)') cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g ''') # Verify that the table is larger than shared_buffers cur.execute(''' select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize from pg_settings where name = 'shared_buffers' ''') row = cur.fetchone() log.info(f"shared_buffers is {row[0]}, table size {row[1]}") assert int(row[0]) < int(row[1]) # Stop and restart pageserver. This is a more or less graceful shutdown, although # the page server doesn't currently have a shutdown routine so there's no difference # between stopping and crashing. env.pageserver.stop() env.pageserver.start() # Stopping the pageserver breaks the connection from the postgres backend to # the page server, and causes the next query on the connection to fail. Start a new # postgres connection too, to avoid that error. (Ideally, the compute node would # handle that and retry internally, without propagating the error to the user, but # currently it doesn't...) pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") assert cur.fetchone() == (100000, ) # Stop the page server by force, and restart it env.pageserver.stop() env.pageserver.start()
def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys): env = zenith_simple_env env.zenith_cli.create_branch("test_zenith_regress", "empty") # Connect to postgres and create a database called "regression". pg = env.postgres.create_start('test_zenith_regress') pg.safe_psql('CREATE DATABASE regression') # Create some local directories for pg_regress to run in. runpath = os.path.join(test_output_dir, 'regress') mkdir_if_needed(runpath) mkdir_if_needed(os.path.join(runpath, 'testtablespace')) # Compute all the file locations that pg_regress will need. # This test runs zenith specific tests build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress') src_path = os.path.join(base_dir, 'test_runner/zenith_regress') bindir = os.path.join(pg_distrib_dir, 'bin') schedule = os.path.join(src_path, 'parallel_schedule') pg_regress = os.path.join(build_path, 'pg_regress') pg_regress_command = [ pg_regress, '--use-existing', '--bindir={}'.format(bindir), '--dlpath={}'.format(build_path), '--schedule={}'.format(schedule), '--inputdir={}'.format(src_path), ] log.info(pg_regress_command) env_vars = { 'PGPORT': str(pg.default_options['port']), 'PGUSER': pg.default_options['user'], 'PGHOST': pg.default_options['host'], } # Run the command. # We don't capture the output. It's not too chatty, and it always # logs the exact same data to `regression.out` anyway. with capsys.disabled(): pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one pg.safe_psql('CHECKPOINT') lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0] # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg)
def collect_stats(pg: Postgres, cur, enable_logs=True): nonlocal last_lsn assert pg.pgdata_dir is not None log.info('executing INSERT to generate WAL') cur.execute("select pg_current_wal_lsn()") current_lsn = lsn_from_hex(cur.fetchone()[0]) / 1024 / 1024 pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, 'pg_wal')) / 1024 / 1024 if enable_logs: log.info( f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB" ) last_lsn = current_lsn return current_lsn, pg_wal_size
def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor): # We don't really need the full environment for this test, just the # safekeepers would be enough. zenith_env_builder.num_safekeepers = 3 env = zenith_env_builder.init_start() timeline_id = uuid.uuid4() tenant_id = uuid.uuid4() # write config for proposer pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata") pg = ProposerPostgres(pgdata_dir, pg_bin, timeline_id, tenant_id, '127.0.0.1', port_distributor.get_port()) pg.create_dir_config(env.get_safekeeper_connstrs()) # valid lsn, which is not in the segment start, nor in zero segment epoch_start_lsn = 0x16B9188 # 0/16B9188 begin_lsn = epoch_start_lsn # append and commit WAL lsn_after_append = [] for i in range(3): res = env.safekeepers[i].append_logical_message( tenant_id, timeline_id, { "lm_prefix": "prefix", "lm_message": "message", "set_commit_lsn": True, "send_proposer_elected": True, "term": 2, "begin_lsn": begin_lsn, "epoch_start_lsn": epoch_start_lsn, "truncate_lsn": epoch_start_lsn, }, ) lsn_hex = lsn_to_hex(res["inserted_wal"]["end_lsn"]) lsn_after_append.append(lsn_hex) log.info(f"safekeeper[{i}] lsn after append: {lsn_hex}") # run sync safekeepers lsn_after_sync = pg.sync_safekeepers() log.info(f"lsn after sync = {lsn_after_sync}") assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
def start_safekeeper(self, i): port = SafekeeperPort( pg=self.port_distributor.get_port(), http=self.port_distributor.get_port(), ) safekeeper_dir = os.path.join(self.repo_dir, f"sk{i}") mkdir_if_needed(safekeeper_dir) args = [ self.bin_safekeeper, "-l", f"127.0.0.1:{port.pg}", "--listen-http", f"127.0.0.1:{port.http}", "-D", safekeeper_dir, "--id", str(i), "--daemonize" ] log.info(f'Running command "{" ".join(args)}"') return subprocess.run(args, check=True)
def test_broker(zenith_env_builder: ZenithEnvBuilder): zenith_env_builder.num_safekeepers = 3 zenith_env_builder.broker = True zenith_env_builder.enable_local_fs_remote_storage() env = zenith_env_builder.init_start() env.zenith_cli.create_branch("test_broker", "main") pg = env.postgres.create_start('test_broker') pg.safe_psql("CREATE TABLE t(key int primary key, value text)") # learn zenith timeline from compute tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0] timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0] # wait until remote_consistent_lsn gets advanced on all safekeepers clients = [sk.http_client() for sk in env.safekeepers] stat_before = [ cli.timeline_status(tenant_id, timeline_id) for cli in clients ] log.info(f"statuses is {stat_before}") pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'") # force checkpoint to advance remote_consistent_lsn with closing(env.pageserver.connect()) as psconn: with psconn.cursor() as pscur: pscur.execute(f"checkpoint {tenant_id} {timeline_id}") # and wait till remote_consistent_lsn propagates to all safekeepers started_at = time.time() while True: stat_after = [ cli.timeline_status(tenant_id, timeline_id) for cli in clients ] if all( lsn_from_hex(s_after.remote_consistent_lsn) > lsn_from_hex( s_before.remote_consistent_lsn) for s_after, s_before in zip(stat_after, stat_before)): break elapsed = time.time() - started_at if elapsed > 20: raise RuntimeError( f"timed out waiting {elapsed:.0f}s for remote_consistent_lsn propagation: status before {stat_before}, status current {stat_after}" ) time.sleep(0.5)
def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: """ Run a process and capture its output Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr" where "cmd" is the name of the program and NNN is an incrementing counter. If those files already exist, we will overwrite them. Returns basepath for files with captured output. """ assert type(cmd) is list base = os.path.basename(cmd[0]) + '_{}'.format(global_counter()) basepath = os.path.join(capture_dir, base) stdout_filename = basepath + '.stdout' stderr_filename = basepath + '.stderr' with open(stdout_filename, 'w') as stdout_f: with open(stderr_filename, 'w') as stderr_f: log.info('(capturing output to "{}.stdout")'.format(base)) subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f) return basepath
def test_config(zenith_simple_env: ZenithEnv): env = zenith_simple_env env.zenith_cli.create_branch("test_config", "empty") # change config pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1']) log.info('postgres is running on test_config branch') with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute(''' SELECT setting FROM pg_settings WHERE source != 'default' AND source != 'override' AND name = 'log_min_messages' ''') # check that config change was applied assert cur.fetchone() == ('debug1', )
def test_timeline_size(zenith_simple_env: ZenithEnv): env = zenith_simple_env # Branch at the point where only 100 rows were inserted new_timeline_id = env.zenith_cli.create_branch('test_timeline_size', 'empty') client = env.pageserver.http_client() timeline_details = assert_local(client, env.initial_tenant, new_timeline_id) assert timeline_details['local'][ 'current_logical_size'] == timeline_details['local'][ 'current_logical_size_non_incremental'] pgmain = env.postgres.create_start("test_timeline_size") log.info("postgres is running on 'test_timeline_size' branch") with closing(pgmain.connect()) as conn: with conn.cursor() as cur: cur.execute("SHOW zenith.zenith_timeline") # Create table, and insert the first 100 rows cur.execute("CREATE TABLE foo (t text)") cur.execute(""" INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10) g """) res = assert_local(client, env.initial_tenant, new_timeline_id) local_details = res['local'] assert local_details["current_logical_size"] == local_details[ "current_logical_size_non_incremental"] cur.execute("TRUNCATE foo") res = assert_local(client, env.initial_tenant, new_timeline_id) local_details = res['local'] assert local_details["current_logical_size"] == local_details[ "current_logical_size_non_incremental"]
def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir): env = zenith_simple_env env.zenith_cli.create_branch('test_dropdb', 'empty') pg = env.postgres.create_start('test_dropdb') log.info("postgres is running on 'test_dropdb' branch") with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute('CREATE DATABASE foodb') cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_before_drop = cur.fetchone()[0] cur.execute("SELECT oid FROM pg_database WHERE datname='foodb';") dboid = cur.fetchone()[0] with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute('DROP DATABASE foodb') cur.execute('CHECKPOINT') cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_after_drop = cur.fetchone()[0] # Create two branches before and after database drop. env.zenith_cli.create_branch('test_before_dropdb', 'test_dropdb', ancestor_start_lsn=lsn_before_drop) pg_before = env.postgres.create_start('test_before_dropdb') env.zenith_cli.create_branch('test_after_dropdb', 'test_dropdb', ancestor_start_lsn=lsn_after_drop) pg_after = env.postgres.create_start('test_after_dropdb') # Test that database exists on the branch before drop pg_before.connect(dbname='foodb').close() # Test that database subdir exists on the branch before drop assert pg_before.pgdata_dir dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) == True # Test that database subdir doesn't exist on the branch after drop assert pg_after.pgdata_dir dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) == False # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg)
def new_pageserver_helper(new_pageserver_dir: pathlib.Path, pageserver_bin: pathlib.Path, remote_storage_mock_path: pathlib.Path, pg_port: int, http_port: int): """ cannot use ZenithPageserver yet because it depends on zenith cli which currently lacks support for multiple pageservers """ cmd = [ str(pageserver_bin), '--init', '--workdir', str(new_pageserver_dir), f"-c listen_pg_addr='localhost:{pg_port}'", f"-c listen_http_addr='localhost:{http_port}'", f"-c pg_distrib_dir='{pg_distrib_dir}'", f"-c id=2", f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}", ] subprocess.check_output(cmd, text=True) # actually run new pageserver cmd = [ str(pageserver_bin), '--workdir', str(new_pageserver_dir), '--daemonize', ] log.info("starting new pageserver %s", cmd) out = subprocess.check_output(cmd, text=True) log.info("started new pageserver %s", out) try: yield finally: log.info("stopping new pageserver") pid = int((new_pageserver_dir / 'pageserver.pid').read_text()) os.kill(pid, signal.SIGQUIT)
def test_vm_bit_clear(zenith_simple_env: ZenithEnv): env = zenith_simple_env env.zenith_cli.create_branch("test_vm_bit_clear", "empty") pg = env.postgres.create_start('test_vm_bit_clear') log.info("postgres is running on 'test_vm_bit_clear' branch") pg_conn = pg.connect() cur = pg_conn.cursor() # Install extension containing function needed for test cur.execute('CREATE EXTENSION zenith_test_utils') # Create a test table and freeze it to set the VM bit. cur.execute('CREATE TABLE vmtest_delete (id integer PRIMARY KEY)') cur.execute('INSERT INTO vmtest_delete VALUES (1)') cur.execute('VACUUM FREEZE vmtest_delete') cur.execute('CREATE TABLE vmtest_update (id integer PRIMARY KEY)') cur.execute( 'INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g') cur.execute('VACUUM FREEZE vmtest_update') # DELETE and UDPATE the rows. cur.execute('DELETE FROM vmtest_delete WHERE id = 1') cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1') # Branch at this point, to test that later env.zenith_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear") # Clear the buffer cache, to force the VM page to be re-fetched from # the page server cur.execute('SELECT clear_buffer_cache()') # Check that an index-only scan doesn't see the deleted row. If the # clearing of the VM bit was not replayed correctly, this would incorrectly # return deleted row. cur.execute(''' set enable_seqscan=off; set enable_indexscan=on; set enable_bitmapscan=off; ''') cur.execute('SELECT * FROM vmtest_delete WHERE id = 1') assert (cur.fetchall() == []) cur.execute('SELECT * FROM vmtest_update WHERE id = 1') assert (cur.fetchall() == []) cur.close() # Check the same thing on the branch that we created right after the DELETE # # As of this writing, the code in smgrwrite() creates a full-page image whenever # a dirty VM page is evicted. If the VM bit was not correctly cleared by the # earlier WAL record, the full-page image hides the problem. Starting a new # server at the right point-in-time avoids that full-page image. pg_new = env.postgres.create_start('test_vm_bit_clear_new') log.info("postgres is running on 'test_vm_bit_clear_new' branch") pg_new_conn = pg_new.connect() cur_new = pg_new_conn.cursor() cur_new.execute(''' set enable_seqscan=off; set enable_indexscan=on; set enable_bitmapscan=off; ''') cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1') assert (cur_new.fetchall() == []) cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1') assert (cur_new.fetchall() == [])
def test_branch_behind(zenith_env_builder: ZenithEnvBuilder): # Use safekeeper in this test to avoid a subtle race condition. # Without safekeeper, walreceiver reconnection can stuck # because of IO deadlock. # # See https://github.com/zenithdb/zenith/issues/1068 zenith_env_builder.num_safekeepers = 1 env = zenith_env_builder.init_start() # Branch at the point where only 100 rows were inserted env.zenith_cli.create_branch('test_branch_behind') pgmain = env.postgres.create_start('test_branch_behind') log.info("postgres is running on 'test_branch_behind' branch") main_pg_conn = pgmain.connect() main_cur = main_pg_conn.cursor() main_cur.execute("SHOW zenith.zenith_timeline") timeline = main_cur.fetchone()[0] # Create table, and insert the first 100 rows main_cur.execute('CREATE TABLE foo (t text)') # keep some early lsn to test branch creation on out of date lsn main_cur.execute('SELECT pg_current_wal_insert_lsn()') gced_lsn = main_cur.fetchone()[0] main_cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100) g ''') main_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_a = main_cur.fetchone()[0] log.info(f'LSN after 100 rows: {lsn_a}') # Insert some more rows. (This generates enough WAL to fill a few segments.) main_cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g ''') main_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_b = main_cur.fetchone()[0] log.info(f'LSN after 200100 rows: {lsn_b}') # Branch at the point where only 100 rows were inserted env.zenith_cli.create_branch('test_branch_behind_hundred', 'test_branch_behind', ancestor_start_lsn=lsn_a) # Insert many more rows. This generates enough WAL to fill a few segments. main_cur.execute(''' INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g ''') main_cur.execute('SELECT pg_current_wal_insert_lsn()') main_cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_c = main_cur.fetchone()[0] log.info(f'LSN after 400100 rows: {lsn_c}') # Branch at the point where only 200100 rows were inserted env.zenith_cli.create_branch('test_branch_behind_more', 'test_branch_behind', ancestor_start_lsn=lsn_b) pg_hundred = env.postgres.create_start('test_branch_behind_hundred') pg_more = env.postgres.create_start('test_branch_behind_more') # On the 'hundred' branch, we should see only 100 rows hundred_pg_conn = pg_hundred.connect() hundred_cur = hundred_pg_conn.cursor() hundred_cur.execute('SELECT count(*) FROM foo') assert hundred_cur.fetchone() == (100, ) # On the 'more' branch, we should see 100200 rows more_pg_conn = pg_more.connect() more_cur = more_pg_conn.cursor() more_cur.execute('SELECT count(*) FROM foo') assert more_cur.fetchone() == (200100, ) # All the rows are visible on the main branch main_cur.execute('SELECT count(*) FROM foo') assert main_cur.fetchone() == (400100, ) # Check bad lsn's for branching # branch at segment boundary env.zenith_cli.create_branch('test_branch_segment_boundary', 'test_branch_behind', ancestor_start_lsn="0/3000000") pg = env.postgres.create_start('test_branch_segment_boundary') cur = pg.connect().cursor() cur.execute('SELECT 1') assert cur.fetchone() == (1, ) # branch at pre-initdb lsn with pytest.raises(Exception, match="invalid branch start lsn"): env.zenith_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42") # branch at pre-ancestor lsn with pytest.raises(Exception, match="less than timeline ancestor lsn"): env.zenith_cli.create_branch('test_branch_preinitdb', 'test_branch_behind', ancestor_start_lsn="0/42") # check that we cannot create branch based on garbage collected data with closing(env.pageserver.connect()) as psconn: with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: # call gc to advace latest_gc_cutoff_lsn pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0") row = pscur.fetchone() print_gc_result(row) with pytest.raises(Exception, match="invalid branch start lsn"): # this gced_lsn is pretty random, so if gc is disabled this woudln't fail env.zenith_cli.create_branch('test_branch_create_fail', 'test_branch_behind', ancestor_start_lsn=gced_lsn) # check that after gc everything is still there hundred_cur.execute('SELECT count(*) FROM foo') assert hundred_cur.fetchone() == (100, ) more_cur.execute('SELECT count(*) FROM foo') assert more_cur.fetchone() == (200100, ) main_cur.execute('SELECT count(*) FROM foo') assert main_cur.fetchone() == (400100, )
def test_clog_truncate(zenith_simple_env: ZenithEnv): env = zenith_simple_env env.zenith_cli.create_branch('test_clog_truncate', 'empty') # set agressive autovacuum to make sure that truncation will happen config = [ 'autovacuum_max_workers=10', 'autovacuum_vacuum_threshold=0', 'autovacuum_vacuum_insert_threshold=0', 'autovacuum_vacuum_cost_delay=0', 'autovacuum_vacuum_cost_limit=10000', 'autovacuum_naptime =1s', 'autovacuum_freeze_max_age=100000' ] pg = env.postgres.create_start('test_clog_truncate', config_lines=config) log.info('postgres is running on test_clog_truncate branch') # Install extension containing function needed for test pg.safe_psql('CREATE EXTENSION zenith_test_utils') # Consume many xids to advance clog with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute('select test_consume_xids(1000*1000*10);') log.info('xids consumed') # call a checkpoint to trigger TruncateSubtrans cur.execute('CHECKPOINT;') # ensure WAL flush cur.execute('select txid_current()') log.info(cur.fetchone()) # wait for autovacuum to truncate the pg_xact # XXX Is it worth to add a timeout here? pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), '0000') log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") while os.path.isfile(pg_xact_0000_path): log.info(f"file exists. wait for truncation. " "pg_xact_0000_path = {pg_xact_0000_path}") time.sleep(5) # checkpoint to advance latest lsn with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute('CHECKPOINT;') cur.execute('select pg_current_wal_insert_lsn()') lsn_after_truncation = cur.fetchone()[0] # create new branch after clog truncation and start a compute node on it log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}') env.zenith_cli.create_branch('test_clog_truncate_new', 'test_clog_truncate', ancestor_start_lsn=lsn_after_truncation) pg2 = env.postgres.create_start('test_clog_truncate_new') log.info('postgres is running on test_clog_truncate_new branch') # check that new node doesn't contain truncated segment pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), '0000') log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}") assert os.path.isfile(pg_xact_0000_path_new) is False
def test_twophase(zenith_simple_env: ZenithEnv): env = zenith_simple_env env.zenith_cli.create_branch("test_twophase", "empty") pg = env.postgres.create_start( 'test_twophase', config_lines=['max_prepared_transactions=5']) log.info("postgres is running on 'test_twophase' branch") conn = pg.connect() cur = conn.cursor() cur.execute('CREATE TABLE foo (t text)') # Prepare a transaction that will insert a row cur.execute('BEGIN') cur.execute("INSERT INTO foo VALUES ('one')") cur.execute("PREPARE TRANSACTION 'insert_one'") # Prepare another transaction that will insert a row cur.execute('BEGIN') cur.execute("INSERT INTO foo VALUES ('two')") cur.execute("PREPARE TRANSACTION 'insert_two'") # Prepare a transaction that will insert a row cur.execute('BEGIN') cur.execute("INSERT INTO foo VALUES ('three')") cur.execute("PREPARE TRANSACTION 'insert_three'") # Prepare another transaction that will insert a row cur.execute('BEGIN') cur.execute("INSERT INTO foo VALUES ('four')") cur.execute("PREPARE TRANSACTION 'insert_four'") # On checkpoint state data copied to files in # pg_twophase directory and fsynced cur.execute('CHECKPOINT') twophase_files = os.listdir(pg.pg_twophase_dir_path()) log.info(twophase_files) assert len(twophase_files) == 4 cur.execute("COMMIT PREPARED 'insert_three'") cur.execute("ROLLBACK PREPARED 'insert_four'") cur.execute('CHECKPOINT') twophase_files = os.listdir(pg.pg_twophase_dir_path()) log.info(twophase_files) assert len(twophase_files) == 2 # Create a branch with the transaction in prepared state env.zenith_cli.create_branch("test_twophase_prepared", "test_twophase") # Start compute on the new branch pg2 = env.postgres.create_start( 'test_twophase_prepared', config_lines=['max_prepared_transactions=5'], ) # Check that we restored only needed twophase files twophase_files2 = os.listdir(pg2.pg_twophase_dir_path()) log.info(twophase_files2) assert twophase_files2.sort() == twophase_files.sort() conn2 = pg2.connect() cur2 = conn2.cursor() # On the new branch, commit one of the prepared transactions, # abort the other one. cur2.execute("COMMIT PREPARED 'insert_one'") cur2.execute("ROLLBACK PREPARED 'insert_two'") cur2.execute('SELECT * FROM foo') assert cur2.fetchall() == [('one', ), ('three', )] # Only one committed insert is visible on the original branch cur.execute('SELECT * FROM foo') assert cur.fetchall() == [('three', )]