def workflow_test_drop_default_cluster(c: Composition) -> None: """Test that the default cluster can be dropped""" c.down(destroy_volumes=True) c.up("materialized") c.wait_for_materialized() c.sql("DROP CLUSTER default CASCADE") c.sql("CREATE CLUSTER default REPLICAS (default (SIZE '1'))")
def run_test(c: Composition, disruption: Disruption, id: int) -> None: print(f"+++ Running disruption scenario {disruption.name}") c.up("testdrive", persistent=True) nodes = [ Computed( name="computed_1_1", peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_1_2", peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_2_1", peers=["computed_2_1", "computed_2_2"], ), Computed( name="computed_2_2", peers=["computed_2_1", "computed_2_2"], ), ] with c.override(*nodes): c.up("materialized", *[n.name for n in nodes]) c.wait_for_materialized() c.sql( """ CREATE CLUSTER cluster1 REPLICAS ( replica1 (REMOTE ['computed_1_1:2100', 'computed_1_2:2100']), replica2 (REMOTE ['computed_2_1:2100', 'computed_2_2:2100']) ) """ ) with c.override( Testdrive( validate_data_dir=False, no_reset=True, materialize_params={"cluster": "cluster1"}, seed=id, ) ): populate(c) # Disrupt replica1 by some means disruption.disruption(c) validate(c) cleanup_list = ["materialized", "testdrive", *[n.name for n in nodes]] c.kill(*cleanup_list) c.rm(*cleanup_list, destroy_volumes=True) c.rm_volumes("mzdata", "pgdata")
def workflow_cluster(c: Composition, parser: WorkflowArgumentParser) -> None: """Run all the limits tests against a multi-node, multi-replica cluster""" parser.add_argument("--scenario", metavar="SCENARIO", type=str, help="Scenario to run.") parser.add_argument( "--workers", type=int, metavar="N", default=2, help="set the default number of workers", ) args = parser.parse_args() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("materialized") c.wait_for_materialized() nodes = [ Computed( name="computed_1_1", workers=args.workers, peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_1_2", workers=args.workers, peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_2_1", workers=args.workers, peers=["computed_2_1", "computed_2_2"], ), Computed( name="computed_2_2", workers=args.workers, peers=["computed_2_1", "computed_2_2"], ), ] with c.override(*nodes): c.up(*[n.name for n in nodes]) c.sql(""" CREATE CLUSTER cluster1 REPLICAS ( replica1 (REMOTE ['computed_1_1:2100', 'computed_1_2:2100']), replica2 (REMOTE ['computed_2_1:2100', 'computed_2_2:2100']) ) """) run_test(c, args)
def start_services(c: Composition, args: argparse.Namespace, instance: str) -> List[Service]: tag, options, nodes, workers = ((args.this_tag, args.this_options, args.this_nodes, args.this_workers) if instance == "this" else (args.other_tag, args.other_options, args.other_nodes, args.other_workers)) cluster_services: List[Service] = [] if nodes: cluster_services.append( Materialized( image=f"materialize/materialized:{tag}" if tag else None, )) node_names = [f"computed_{n}" for n in range(0, nodes)] for node_id in range(0, nodes): cluster_services.append( Computed( name=node_names[node_id], workers=workers, options=options, peers=node_names, image=f"materialize/computed:{tag}" if tag else None, )) else: cluster_services.append( Materialized( image=f"materialize/materialized:{tag}" if tag else None, workers=workers, options=options, )) with c.override(*cluster_services): print(f"The version of the '{instance.upper()}' Mz instance is:") c.run("materialized", "--version") # Single-binary legacy Mz instances only have port 6875 open # so only check that port before proceeding c.up("materialized") c.wait_for_materialized(port=6875) if nodes: print(f"Starting cluster for '{instance.upper()}' ...") c.up(*[f"computed_{n}" for n in range(0, nodes)]) c.sql("CREATE CLUSTER REPLICA default.feature_benchmark REMOTE [" + ",".join([f"'computed_{n}:2100'" for n in range(0, nodes)]) + "];") c.sql("DROP CLUSTER REPLICA default.default_replica") c.up("testdrive", persistent=True) return cluster_services
def test_cluster(c: Composition, *glob: str) -> None: c.up("materialized") c.wait_for_materialized() # Create a remote cluster and verify that tests pass. c.up("computed_1") c.up("computed_2") c.sql("DROP CLUSTER IF EXISTS cluster1 CASCADE;") c.sql( "CREATE CLUSTER cluster1 REPLICAS (replica1 (REMOTE ['computed_1:2100', 'computed_2:2100']));" ) c.run("testdrive", *glob) # Add a replica to that remote cluster and verify that tests still pass. c.up("computed_3") c.up("computed_4") c.sql( "CREATE CLUSTER REPLICA cluster1.replica2 REMOTE ['computed_3:2100', 'computed_4:2100']" ) c.run("testdrive", *glob) # Kill one of the nodes in the first replica of the compute cluster and # verify that tests still pass. c.kill("computed_1") c.run("testdrive", *glob) # Leave only replica 2 up and verify that tests still pass. c.sql("DROP CLUSTER REPLICA cluster1.replica1") c.run("testdrive", *glob)
def setup(c: Composition) -> None: c.up("testdrive", persistent=True) c.start_and_wait_for_tcp( services=["redpanda", "postgres-backend", "postgres-source", "debezium"] ) for postgres in ["postgres-backend", "postgres-source"]: c.wait_for_postgres(service=postgres) c.sql( sql=f""" CREATE SCHEMA IF NOT EXISTS consensus; CREATE SCHEMA IF NOT EXISTS storage; CREATE SCHEMA IF NOT EXISTS adapter; """, service="postgres-backend", user="******", password="******", )
def workflow_audit_log(c: Composition) -> None: c.up("materialized") c.wait_for_materialized(service="materialized") # Create some audit log entries. c.sql("CREATE TABLE t (i INT)") c.sql("CREATE DEFAULT INDEX ON t") log = c.sql_query("SELECT * FROM mz_audit_events ORDER BY id") # Restart mz. c.kill("materialized") c.up("materialized") c.wait_for_materialized() # Verify the audit log entries are still present and have not changed. restart_log = c.sql_query("SELECT * FROM mz_audit_events ORDER BY id") if log != restart_log: print("initial audit log:", log) print("audit log after restart:", restart_log) raise Exception("audit logs not equal after restart")
def workflow_test_github_13603(c: Composition) -> None: """Test that multi woker replicas terminate eagerly upon rehydration""" c.down(destroy_volumes=True) c.up("materialized") c.wait_for_materialized() c.up("computed_1") c.up("computed_2") c.sql( "CREATE CLUSTER cluster1 REPLICAS (replica1 (REMOTE ['computed_1:2100', 'computed_2:2100']));" ) c.kill("materialized") c.up("materialized") c.wait_for_materialized() # Ensure the computeds have crashed c1 = c.invoke("logs", "computed_1", capture=True) assert "panicked" in c1.stdout c2 = c.invoke("logs", "computed_2", capture=True) assert "panicked" in c2.stdout
def test_github_12251(c: Composition) -> None: c.up("materialized") c.wait_for_materialized() c.up("computed_1") c.sql(""" DROP CLUSTER IF EXISTS cluster1 CASCADE; CREATE CLUSTER cluster1 REPLICAS (replica1 (REMOTE ['computed_1:2100'])); SET cluster = cluster1; """) start_time = time.process_time() try: c.sql(""" SET statement_timeout = '1 s'; CREATE TABLE IF NOT EXISTS log_table (f1 TEXT); CREATE TABLE IF NOT EXISTS panic_table (f1 TEXT); INSERT INTO panic_table VALUES ('panic!'); -- Crash loop the cluster with the table's index INSERT INTO log_table SELECT mz_internal.mz_panic(f1) FROM panic_table; """) except ProgrammingError as e: # Ensure we received the correct error message assert "statement timeout" in e.args[0]["M"], e # Ensure the statemenet_timeout setting is ~honored assert (time.process_time() - start_time < 2), "idle_in_transaction_session_timeout not respected" else: assert False, "unexpected success in test_github_12251" # Ensure we can select from tables after cancellation. c.sql("SELECT * FROM log_table;")
def workflow_test_cluster(c: Composition, parser: WorkflowArgumentParser) -> None: """Run testdrive in a variety of compute cluster configurations.""" parser.add_argument( "glob", nargs="*", default=["smoke/*.td"], help="run against the specified files", ) args = parser.parse_args() c.down(destroy_volumes=True) c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry", "localstack"]) c.up("materialized") c.wait_for_materialized() # Create a remote cluster and verify that tests pass. c.up("computed_1") c.up("computed_2") c.sql("DROP CLUSTER IF EXISTS cluster1 CASCADE;") c.sql( "CREATE CLUSTER cluster1 REPLICAS (replica1 (REMOTE ['computed_1:2100', 'computed_2:2100']));" ) c.run("testdrive", *args.glob) # Add a replica to that remote cluster and verify that tests still pass. c.up("computed_3") c.up("computed_4") c.sql( "CREATE CLUSTER REPLICA cluster1.replica2 REMOTE ['computed_3:2100', 'computed_4:2100']" ) c.run("testdrive", *args.glob) # Kill one of the nodes in the first replica of the compute cluster and # verify that tests still pass. c.kill("computed_1") c.run("testdrive", *args.glob) # Leave only replica 2 up and verify that tests still pass. c.sql("DROP CLUSTER REPLICA cluster1.replica1") c.run("testdrive", *args.glob)
def workflow_stash(c: Composition) -> None: c.rm( "testdrive", "materialized", stop=True, destroy_volumes=True, ) c.rm_volumes("mzdata", "pgdata", force=True) materialized = Materialized(options=[ "--adapter-stash-url", "postgres://*****:*****@postgres" ], ) postgres = Postgres(image="postgres:14.4") with c.override(materialized, postgres): c.up("postgres") c.wait_for_postgres() c.start_and_wait_for_tcp(services=["materialized"]) c.wait_for_materialized("materialized") c.sql("CREATE TABLE a (i INT)") c.stop("postgres") c.up("postgres") c.wait_for_postgres() c.sql("CREATE TABLE b (i INT)") c.rm("postgres", stop=True, destroy_volumes=True) c.up("postgres") c.wait_for_postgres() # Postgres cleared its database, so this should fail. try: c.sql("CREATE TABLE c (i INT)") raise Exception("expected unreachable") except Exception as e: # Depending on timing, either of these errors can occur. The stash error comes # from the stash complaining. The network error comes from pg8000 complaining # because materialize panic'd. if "stash error: postgres: db error" not in str( e) and "network error" not in str(e): raise e
def workflow_default(c: Composition) -> None: """Streams data from Wikipedia to a browser visualzation.""" c.up("server") c.wait_for_materialized() c.sql((Path(__file__).parent / "views.sql").read_text())
def execute(self, c: Composition) -> None: c.sql(""" DROP CLUSTER REPLICA default.default_replica; CREATE CLUSTER REPLICA default.default_replica REMOTE ['computed_1:2100']; """)
def execute(self, c: Composition) -> None: c.sql( f"ALTER SYSTEM SET {self.config_param} TO {self.value}", user="******", port=6877, )
def execute(self, c: Composition) -> None: c.sql(""" DROP CLUSTER REPLICA default.default_replica; CREATE CLUSTER REPLICA default.default_replica SIZE '1'; """)
def workflow_instance_size(c: Composition, parser: WorkflowArgumentParser) -> None: """Create multiple clusters with multiple nodes and replicas each""" c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) parser.add_argument( "--workers", type=int, metavar="N", default=2, help="set the default number of workers", ) parser.add_argument( "--clusters", type=int, metavar="N", default=16, help="set the number of clusters to create", ) parser.add_argument( "--nodes", type=int, metavar="N", default=4, help="set the number of nodes per cluster", ) parser.add_argument( "--replicas", type=int, metavar="N", default=4, help="set the number of replicas per cluster", ) args = parser.parse_args() c.up("testdrive", persistent=True) c.up("materialized") c.wait_for_materialized() # Construct the requied Computed instances and peer them into clusters computeds = [] for cluster_id in range(0, args.clusters): for replica_id in range(0, args.replicas): nodes = [] for node_id in range(0, args.nodes): node_name = f"computed_{cluster_id}_{replica_id}_{node_id}" nodes.append(node_name) for node_id in range(0, args.nodes): computeds.append( Computed(name=nodes[node_id], peers=nodes, workers=args.workers)) with c.override(*computeds): with c.override(Testdrive(seed=1, no_reset=True)): for n in computeds: c.up(n.name) # Create some input data c.testdrive( dedent(""" > CREATE TABLE ten (f1 INTEGER); > INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); $ set schema={ "type" : "record", "name" : "test", "fields" : [ {"name":"f1", "type":"string"} ] } $ kafka-create-topic topic=instance-size $ kafka-ingest format=avro topic=instance-size schema=${schema} publish=true repeat=10000 {"f1": "fish"} """)) # Construct the required CREATE CLUSTER statements for cluster_id in range(0, args.clusters): replica_definitions = [] for replica_id in range(0, args.replicas): nodes = [] for node_id in range(0, args.nodes): node_name = f"computed_{cluster_id}_{replica_id}_{node_id}" nodes.append(node_name) replica_name = f"replica_{cluster_id}_{replica_id}" replica_definitions.append(f"{replica_name} (REMOTE [" + ", ".join(f"'{n}:2100'" for n in nodes) + "])") c.sql(f"CREATE CLUSTER cluster_{cluster_id} REPLICAS (" + ",".join(replica_definitions) + ")") # Construct some dataflows in each cluster for cluster_id in range(0, args.clusters): cluster_name = f"cluster_{cluster_id}" c.testdrive( dedent(f""" > SET cluster={cluster_name} > CREATE DEFAULT INDEX ON ten; > CREATE MATERIALIZED VIEW v_{cluster_name} AS SELECT COUNT(*) AS c1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; > CREATE MATERIALIZED SOURCE s_{cluster_name} FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC 'testdrive-instance-size-${{testdrive.seed}}' FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}' ENVELOPE NONE """)) # Validate that each individual cluster is operating properly for cluster_id in range(0, args.clusters): cluster_name = f"cluster_{cluster_id}" c.testdrive( dedent(f""" > SET cluster={cluster_name} > SELECT c1 FROM v_{cluster_name}; 10000 > SELECT COUNT(*) FROM s_{cluster_name} 10000 """))
def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp(services=["materialized"]) c.wait_for_materialized("materialized") # ensure that the directory has restricted permissions c.exec( "materialized", "bash", "-c", "[[ `stat -c \"%a\" /mzdata/secrets` == '700' ]] && exit 0 || exit 1", ) c.sql("CREATE SECRET secret AS 's3cret'") # Check that the contents of the secret have made it to the storage c.exec( "materialized", "bash", "-c", "[[ `cat /mzdata/secrets/*` == 's3cret' ]] && exit 0 || exit 1", ) # Check that the file permissions are restrictive c.exec( "materialized", "bash", "-c", "[[ `stat -c \"%a\" /mzdata/secrets/*` == '600' ]] && exit 0 || exit 1", ) # Check that alter secret gets reflected on disk c.sql("ALTER SECRET secret AS 'tops3cret'") c.exec( "materialized", "bash", "-c", "[[ `cat /mzdata/secrets/*` == 'tops3cret' ]] && exit 0 || exit 1", ) # check that replacing the file did not change permissions c.exec( "materialized", "bash", "-c", "[[ `stat -c \"%a\" /mzdata/secrets/*` == '600' ]] && exit 0 || exit 1", ) # Rename should not change the contents on disk c.sql("ALTER SECRET secret RENAME TO renamed_secret") # Check that the contents of the secret have made it to the storage c.exec( "materialized", "bash", "-c", "[[ `cat /mzdata/secrets/*` == 'tops3cret' ]] && exit 0 || exit 1", ) c.sql("DROP SECRET renamed_secret") # Check that the file has been deleted from the storage c.exec( "materialized", "bash", "-c", "[[ -z `ls -A /mzdata/secrets` ]] && exit 0 || exit 1", )