def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument("--message-count", type=int, default=1000) parser.add_argument("--partitions", type=int, default=1) parser.add_argument("--check-sink", action="store_true") parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) args = parser.parse_args() dependencies = ["materialized"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] c.start_and_wait_for_tcp(services=dependencies) c.run( "billing-demo", "--materialized-host=materialized", "--kafka-host=kafka", "--schema-registry-url=http://schema-registry:8081", "--create-topic", "--replication-factor=1", f"--message-count={args.message_count}", f"--partitions={args.partitions}", *(["--check-sink"] if args.check_sink else []), )
def workflow_default(c: Composition) -> None: c.up("test-certs") c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("materialized") c.wait_for_materialized() c.run("testdrive", "*.td")
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: c.start_and_wait_for_tcp(services=[ "zookeeper", "kafka", "schema-registry", "materialized", "toxiproxy" ]) c.wait_for_materialized() seed = random.getrandbits(16) for i, failure_mode in enumerate([ "toxiproxy-close-connection.td", "toxiproxy-timeout.td", ]): c.run( "testdrive-svc", "--no-reset", "--max-errors=1", f"--seed={seed}{i}", f"--temp-dir=/share/tmp/kafka-resumption-{seed}{i}", "setup.td", failure_mode, "during.td", "sleep.td", "toxiproxy-restore-connection.td", "verify-success.td", "cleanup.td", )
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--seed", help="an alternate seed to use to avoid clashing with existing topics", type=int, default=1, ) args = parser.parse_args() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry", "materialized"]) c.run( "testdrive", f"--seed={args.seed}", "--kafka-option=group.id=group1", "before-restart.td", ) c.kill("materialized") c.up("materialized") c.wait_for_materialized() c.run( "testdrive", f"--seed={args.seed}", "--no-reset", "--kafka-option=group.id=group2", "after-restart.td", )
def workflow_upgrade(c: Composition, parser: WorkflowArgumentParser) -> None: """Test upgrades from various versions.""" parser.add_argument( "--min-version", metavar="VERSION", type=Version.parse, default=Version.parse("0.8.0"), help="the minimum version to test from", ) parser.add_argument( "--most-recent", metavar="N", type=int, help="limit testing to the N most recent versions", ) parser.add_argument( "filter", nargs="?", default="*", help="limit to only the files matching filter" ) args = parser.parse_args() tested_versions = [v for v in all_versions if v >= args.min_version] if args.most_recent is not None: tested_versions = tested_versions[: args.most_recent] tested_versions.reverse() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry", "postgres"] ) for version in tested_versions: priors = [f"v{v}" for v in all_versions if v < version] test_upgrade_from_version(c, f"v{version}", priors, filter=args.filter) test_upgrade_from_version(c, "current_source", priors=["*"], filter=args.filter)
def workflow_pg_snapshot_resumption(c: Composition) -> None: """Test creating sources in a remote storaged process.""" c.down(destroy_volumes=True) with c.override( # Start postgres for the pg source Postgres(), Testdrive(no_reset=True), Storaged(environment=["FAILPOINTS=pg_snapshot_failure=return"]), ): dependencies = [ "materialized", "postgres", "storaged", ] c.start_and_wait_for_tcp(services=dependencies, ) c.run("testdrive", "pg-snapshot-resumption/01-configure-postgres.td") c.run("testdrive", "pg-snapshot-resumption/02-create-sources.td") # storaged should crash c.run("testdrive", "pg-snapshot-resumption/03-while-storaged-down.td") print("Sleeping to ensure that storaged crashes") time.sleep(10) with c.override( # turn off the failpoint Storaged()): c.start_and_wait_for_tcp(services=["storaged"], ) c.run("testdrive", "pg-snapshot-resumption/04-verify-data.td")
def test_testdrive(c: Composition, mz: Materialized, aws: str, tests: str) -> None: c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry", mz.name] ) c.wait_for_materialized(mz.name) c.run("testdrive-svc", aws, tests) c.kill(mz.name)
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run the proxy tests.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) args = parser.parse_args() dependencies = ["squid"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] if not args.aws_region: dependencies += ["localstack"] c.start_and_wait_for_tcp(dependencies) aws_arg = (f"--aws-region={args.aws_region}" if args.aws_region else "--aws-endpoint=http://localstack:4566") for test_case in test_cases: print(f"Running test case {test_case.name!r}") with c.override(Materialized(environment_extra=test_case.env)): c.up("materialized") c.wait_for_materialized("materialized") c.run("testdrive-svc", aws_arg, *test_case.files)
def workflow_kafka_sources(c: Composition) -> None: seed = round(time.time()) c.start_and_wait_for_tcp(services=prerequisites) c.up("materialized") c.wait_for_materialized("materialized") c.run("testdrive-svc", f"--seed={seed}", f"kafka-sources/*{td_test}*-before.td") c.kill("materialized") c.up("materialized") c.wait_for_materialized("materialized") # And restart again, for extra stress c.kill("materialized") c.up("materialized") c.wait_for_materialized("materialized") c.run("testdrive-svc", f"--seed={seed}", f"kafka-sources/*{td_test}*-after.td") # Do one more restart, just in case and just confirm that Mz is able to come up c.kill("materialized") c.up("materialized") c.wait_for_materialized("materialized") c.kill("materialized") c.rm("materialized", "testdrive-svc", destroy_volumes=True) c.rm_volumes("mzdata")
def workflow_disable_user_indexes(c: Composition) -> None: seed = round(time.time()) c.start_and_wait_for_tcp(services=prerequisites) c.up("materialized") c.wait_for_materialized() c.run("testdrive-svc", f"--seed={seed}", "disable-user-indexes/before.td") c.kill("materialized") with c.override( Materialized(options=f"{mz_options} --disable-user-indexes", )): c.up("materialized") c.wait_for_materialized() c.run("testdrive-svc", f"--seed={seed}", "disable-user-indexes/after.td") c.kill("materialized") c.rm("materialized", "testdrive-svc", destroy_volumes=True) c.rm_volumes("mzdata")
def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp(["zookeeper", "kafka", "schema-registry"]) c.run("ci-cargo-test", "run-tests") token = os.environ["BUILDKITE_TEST_ANALYTICS_API_KEY_CARGO_TEST"] if len(token) < 1: print("Analytics API key empty, skipping junit reporting") return with open(f"{ROOT.as_posix()}/results.json") as f: junit_xml = spawn.capture(args=["cargo2junit"], stdin=f.read()) requests.post( "https://analytics-api.buildkite.com/v1/uploads", headers={"Authorization": f"Token {token}"}, json={ "format": "junit", "run_env": { "key": os.environ["BUILDKITE_BUILD_ID"], "CI": "buildkite", "number": os.environ["BUILDKITE_BUILD_NUMBER"], "job_id": os.environ["BUILDKITE_JOB_ID"], "branch": os.environ["BUILDKITE_BRANCH"], "commit_sha": os.environ["BUILDKITE_COMMIT"], "message": os.environ["BUILDKITE_MESSAGE"], "url": os.environ["BUILDKITE_BUILD_URL"], }, "data": junit_xml, }, )
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--scenario", metavar="SCENARIO", type=str, help="Scenario to run", required=True, ) parser.add_argument("--seed", metavar="N", type=int, help="Random seed", default=1) args = parser.parse_args() scenario_class = globals()[args.scenario] c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("testdrive", persistent=True) random.seed(args.seed) print("Generating test...") test = Test(scenario=scenario_class(), max_actions=500) print("Running test...") test.run(c)
def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp(services=["localstack"]) for version in CONFLUENT_PLATFORM_VERSIONS: print(f"==> Testing Confluent Platform {version}") confluent_platform_services = [ Zookeeper(tag=version), Kafka(tag=version), SchemaRegistry(tag=version), ] with c.override(*confluent_platform_services): c.start_and_wait_for_tcp(services=[ "zookeeper", "kafka", "schema-registry", "materialized" ]) c.wait_for_materialized() c.run("testdrive", "kafka-matrix.td", "testdrive/kafka-*.td") c.kill( "zookeeper", "kafka", "schema-registry", "materialized", ) c.rm( "zookeeper", "kafka", "schema-registry", "materialized", "testdrive", destroy_volumes=True, ) c.rm_volumes("mzdata", "pgdata", force=True)
def workflow_test_upsert(c: Composition) -> None: """Test creating upsert sources and continuing to ingest them after a restart.""" with c.override( Testdrive(default_timeout="30s", no_reset=True, consistent_seed=True), ): c.down(destroy_volumes=True) dependencies = [ "materialized", "zookeeper", "kafka", "schema-registry", ] c.start_and_wait_for_tcp(services=dependencies, ) c.run("testdrive", "upsert/01-create-sources.td") # Sleep to make sure the errors have made it to persist. # This isn't necessary for correctness, # as we should be able to crash at any point and re-start. # But if we don't sleep here, then we might be ingesting the errored # records in the new process, and so we won't actually be testing # the ability to retract error values that make it to persist. print("Sleeping for ten seconds") time.sleep(10) c.exec("materialized", "bash", "-c", "kill -9 `pidof storaged`") c.run("testdrive", "upsert/02-after-storaged-restart.td")
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run testdrive.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) parser.add_argument( "--workers", type=int, metavar="N", help="set the number of materialized dataflow workers", ) parser.add_argument( "--persistent-user-tables", action="store_true", help="enable the --persistent-user-tables materialized option", ) parser.add_argument( "files", nargs="*", default=["*.td", "esoteric/*.td"], help="run against the specified files", ) args = parser.parse_args() if not args.redpanda and Arch.host() == Arch.AARCH64: ui.warn( "Running the Confluent Platform in Docker on ARM-based machines is " "nearly unusably slow. Consider using Redpanda instead (--redpanda) " "or running tests without mzcompose.") dependencies = ["materialized"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] materialized = Materialized( workers=args.workers, options=["--persistent-user-tables"] if args.persistent_user_tables else [], ) testdrive = Testdrive( forward_buildkite_shard=True, entrypoint_extra=[f"--aws-region={args.aws_region}"] if args.aws_region else ["--aws-endpoint=http://localstack:4566"], ) with c.override(materialized, testdrive): c.start_and_wait_for_tcp(services=dependencies) c.wait_for_materialized("materialized") c.run("testdrive-svc", *args.files) c.kill("materialized")
def workflow_testdrive(c: Composition) -> None: c.start_and_wait_for_tcp(services=[ "zookeeper", "kafka", "schema-registry", "materialized", ]) c.run("testdrive-svc", tests)
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: """Run testdrive.""" parser.add_argument( "--redpanda", action="store_true", help="run against Redpanda instead of the Confluent Platform", ) parser.add_argument( "--aws-region", help="run against the specified AWS region instead of localstack", ) parser.add_argument( "--kafka-default-partitions", type=int, metavar="N", help="set the default number of kafka partitions per topic", ) parser.add_argument( "files", nargs="*", default=["*.td"], help="run against the specified files", ) args = parser.parse_args() if not args.redpanda and Arch.host() == Arch.AARCH64: ui.warn( "Running the Confluent Platform in Docker on ARM-based machines is " "nearly unusably slow. Consider using Redpanda instead (--redpanda) " "or running tests without mzcompose." ) dependencies = ["materialized"] if args.redpanda: dependencies += ["redpanda"] else: dependencies += ["zookeeper", "kafka", "schema-registry"] if args.aws_region is None: dependencies += ["localstack"] testdrive = Testdrive( forward_buildkite_shard=True, kafka_default_partitions=args.kafka_default_partitions, aws_region=args.aws_region, validate_postgres_stash=True, ) with c.override(testdrive): c.start_and_wait_for_tcp(services=dependencies) c.wait_for_materialized("materialized") try: junit_report = ci_util.junit_report_filename(c.name) c.run("testdrive", f"--junit-report={junit_report}", *args.files) finally: ci_util.upload_junit_report( "testdrive", Path(__file__).parent / junit_report )
def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp( services=["localstack", "materialized", "toxiproxy"]) c.wait_for_materialized() # For different values of bytes_allowed, the following happens: # 0 - Connection is dropped immediately # 1K - SQS queue and bucket listing are both prevented # 2K - SQS and key fetching are both prevented # 10K - only key fetching is prevented for toxiproxy_bytes_allowed in [ 0, 256, 512, 768, 1024, 1536, 2 * 1024, 3 * 1024, 5 * 1024, 10 * 1024, 20 * 1024, ]: # For small values of toxiproxy_bytes_allowed, we need to allow for CREATE SOURCE to go undisturbed first, otherwise it fails immediately toxiproxy_setup = ( ["configure-materialize.td", "toxiproxy-close-connection.td"] if toxiproxy_bytes_allowed < 1024 else ["toxiproxy-close-connection.td", "configure-materialize.td"]) c.run( "testdrive", "--no-reset", "--max-errors=1", f"--seed={toxiproxy_bytes_allowed}", "--aws-endpoint=http://toxiproxy:4566", f"--var=toxiproxy-bytes-allowed={toxiproxy_bytes_allowed}", "configure-toxiproxy.td", "s3-create.td", "s3-insert-long.td", "s3-insert-long-gzip.td", # # Confirm that short network interruptions are tolerated # *toxiproxy_setup, "short-sleep.td", "toxiproxy-restore-connection.td", "materialize-verify-success.td", # # Confirm that long network interruptions cause source error # Disabled due to https://github.com/MaterializeInc/materialize/issues/7009 # "s3-insert-long.td s3-insert-long-gzip.td toxiproxy-close-connection.td materialize-verify-failure.td", # # Cleanup # "materialize-drop-source.td", "toxiproxy-remove.td", )
def workflow_postgres(c: Composition) -> None: c.start_and_wait_for_tcp(services=prerequisites) c.start_and_wait_for_tcp(services=["postgres"]) c.wait_for_postgres(service="postgres") c.wait_for_materialized("materialized") c.run("testdrive", "postgres/debezium-postgres.td.initialize") c.run("testdrive", "postgres/*.td")
def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry", "materialized"]) with tempfile.NamedTemporaryFile(mode="w", dir=c.path) as tmp: with contextlib.redirect_stdout(tmp): [cls.generate() for cls in Generator.__subclasses__()] sys.stdout.flush() c.run("testdrive-svc", os.path.basename(tmp.name))
def workflow_default(c: Composition) -> None: """Test cluster isolation by introducing faults of various kinds in cluster1 and then making sure that cluster2 continues to operate properly """ c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) for id, disruption in enumerate(disruptions): run_test(c, disruption, id)
def workflow_cluster(c: Composition, parser: WorkflowArgumentParser) -> None: """Run all the limits tests against a multi-node, multi-replica cluster""" parser.add_argument("--scenario", metavar="SCENARIO", type=str, help="Scenario to run.") parser.add_argument( "--workers", type=int, metavar="N", default=2, help="set the default number of workers", ) args = parser.parse_args() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("materialized") c.wait_for_materialized() nodes = [ Computed( name="computed_1_1", workers=args.workers, peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_1_2", workers=args.workers, peers=["computed_1_1", "computed_1_2"], ), Computed( name="computed_2_1", workers=args.workers, peers=["computed_2_1", "computed_2_2"], ), Computed( name="computed_2_2", workers=args.workers, peers=["computed_2_1", "computed_2_2"], ), ] with c.override(*nodes): c.up(*[n.name for n in nodes]) c.sql(""" CREATE CLUSTER cluster1 REPLICAS ( replica1 (REMOTE ['computed_1_1:2100', 'computed_1_2:2100']), replica2 (REMOTE ['computed_2_1:2100', 'computed_2_2:2100']) ) """) run_test(c, args)
def workflow_cluster_testdrive(c: Composition) -> None: c.start_and_wait_for_tcp(services=["zookeeper", "kafka", "schema-registry"]) # Skip tests that use features that are not supported yet test_cluster( c, "grep", "-LE", "mz_catalog|mz_kafka_|mz_records_|mz_metrics", "testdrive/*.td", )
def run_one_scenario( c: Composition, scenario: Type[Scenario], args: argparse.Namespace ) -> Comparator: name = scenario.__name__ print(f"--- Now benchmarking {name} ...") comparator = make_comparator(name) common_seed = round(time.time()) mzs = { "this": Materialized( image=f"materialize/materialized:{args.this_tag}" if args.this_tag else None, options=args.this_options, ), "other": Materialized( image=f"materialize/materialized:{args.other_tag}" if args.other_tag else None, options=args.other_options, ), } for mz_id, instance in enumerate(["this", "other"]): with c.override(mzs[instance]): print(f"The version of the '{instance.upper()}' Mz instance is:") c.run("materialized", "--version") c.start_and_wait_for_tcp(services=["materialized"]) c.wait_for_materialized() executor = Docker( composition=c, seed=common_seed, ) benchmark = Benchmark( mz_id=mz_id, scenario=scenario, scale=args.scale, executor=executor, filter=make_filter(args), termination_conditions=make_termination_conditions(args), aggregation=make_aggregation(), ) outcome, iterations = benchmark.run() comparator.append(outcome) c.kill("materialized") c.rm("materialized", "testdrive-svc") c.rm_volumes("mzdata") return comparator
def workflow_redpanda_testdrive(c: Composition) -> None: c.start_and_wait_for_tcp(services=["redpanda", "materialized"]) # Features currently not supported by Redpanda: # - `kafka-time-offset.td` (https://github.com/vectorizedio/redpanda/issues/2397) # Due to interactions between docker-compose, entrypoint, command, and bash, it is not possible to have # a more complex filtering expression in 'command' . So we basically run the entire testdrive suite here # except tests that contain features known to be not supported by Redpanda. So the run includes testdrive # tests that do not touch Kafka at all. c.run("testdrive-svc", "grep", "-LE", "kafka_time_offset", "*.td")
def workflow_default(c: Composition) -> None: c.start_and_wait_for_tcp( ["zookeeper", "kafka", "schema-registry", "postgres"]) try: c.run("ci-cargo-test", "run-tests") finally: junit_report = ci_util.junit_report_filename("cargo-test") spawn.runv( ["cargo2junit"], stdin=(ROOT / "results.json").open("rb"), stdout=junit_report.open("wb"), ), ci_util.upload_junit_report("cargo-test", junit_report)
def workflow_nightly(c: Composition) -> None: """Run cluster testdrive""" c.start_and_wait_for_tcp(services=["zookeeper", "kafka", "schema-registry"]) # Skip tests that use features that are not supported yet. files = spawn.capture( [ "sh", "-c", "grep -rLE 'mz_catalog|mz_kafka_|mz_records_|mz_metrics' testdrive/*.td", ], cwd=Path(__file__).parent.parent, ).split() test_cluster(c, *files)
def workflow_failpoints(c: Composition) -> None: c.start_and_wait_for_tcp(services=prerequisites) for mz in mz_configurations: with c.override(mz): for failpoint in [ "fileblob_set_sync", "fileblob_delete_before", "fileblob_delete_after", "insert_timestamp_bindings_before", "insert_timestamp_bindings_after", ]: for action in ["return", "panic", "sleep(1000)"]: run_one_failpoint(c, failpoint, action)
def workflow_disable_user_indexes(c: Composition) -> None: c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) # Create catalog with vanilla MZ c.up("materialized") c.wait_for_materialized("materialized") c.run("testdrive-svc", "user-indexes-enabled.td") c.kill("materialized") # Test semantics of disabling user indexes c.up("mz_disable_user_indexes") c.wait_for_materialized("mz_disable_user_indexes") c.run("testdrive_no_reset", "user-indexes-disabled.td") c.kill("mz_disable_user_indexes")
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument("--scenario", metavar="SCENARIO", type=str, help="Scenario to run.") args = parser.parse_args() c.start_and_wait_for_tcp( services=["zookeeper", "kafka", "schema-registry"]) c.up("materialized") c.wait_for_materialized() run_test(c, args)