Пример #1
0
def workflow_test_github_13603(c: Composition) -> None:
    """Test that multi woker replicas terminate eagerly upon rehydration"""
    c.down(destroy_volumes=True)
    c.up("materialized")
    c.wait_for_materialized()

    c.up("computed_1")
    c.up("computed_2")
    c.sql(
        "CREATE CLUSTER cluster1 REPLICAS (replica1 (REMOTE ['computed_1:2100', 'computed_2:2100']));"
    )

    c.kill("materialized")
    c.up("materialized")
    c.wait_for_materialized()

    # Ensure the computeds have crashed
    c1 = c.invoke("logs", "computed_1", capture=True)
    assert "panicked" in c1.stdout
    c2 = c.invoke("logs", "computed_2", capture=True)
    assert "panicked" in c2.stdout
Пример #2
0
def workflow_kafka_matrix(c: Composition) -> None:
    for version in CONFLUENT_PLATFORM_VERSIONS:
        print(f"==> Testing Confluent Platform {version}")
        confluent_platform_services = [
            Zookeeper(tag=version),
            Kafka(tag=version),
            SchemaRegistry(tag=version),
        ]
        with c.override(*confluent_platform_services):
            c.start_and_wait_for_tcp(services=[
                "zookeeper", "kafka", "schema-registry", "materialized"
            ])
            c.wait_for_materialized()
            c.run("testdrive-svc", "kafka-matrix.td")
            c.rm(
                "zookeeper",
                "kafka",
                "schema-registry",
                "materialized",
                destroy_volumes=True,
            )
Пример #3
0
def workflow_user_tables(c: Composition) -> None:
    seed = round(time.time())

    c.up("materialized")
    c.wait_for_materialized()

    c.run(
        "testdrive-svc",
        f"--seed {seed} user-tables/table-persistence-before-{td_test}.td",
    )

    c.kill("materialized")
    c.up("materialized")

    c.run(
        "testdrive-svc",
        f"--seed {seed} user-tables/table-persistence-after-{td_test}.td",
    )

    c.kill("materialized")
    c.rm("materialized", "testdrive-svc", destroy_volumes=True)
    c.rm_volumes("mzdata")
Пример #4
0
def workflow_disable_user_indexes(c: Composition) -> None:
    seed = round(time.time())

    c.start_and_wait_for_tcp(services=prerequisites)

    c.up("materialized")
    c.wait_for_materialized()

    c.run("testdrive-svc", f"--seed {seed} disable-user-indexes/before.td")

    c.kill("materialized")

    with c.override(mz_disable_user_indexes):
        c.up("materialized")
        c.wait_for_materialized()

        c.run("testdrive-svc", f"--seed {seed} disable-user-indexes/after.td")

        c.kill("materialized")

    c.rm("materialized", "testdrive-svc", destroy_volumes=True)

    c.rm_volumes("mzdata")
Пример #5
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "filter",
        nargs="*",
        default=["*.td"],
        help="limit to only the files matching filter",
    )
    args = parser.parse_args()

    ssl_ca = c.run("test-certs", "cat", "/secrets/ca.crt", capture=True).stdout
    ssl_cert = c.run("test-certs",
                     "cat",
                     "/secrets/certuser.crt",
                     capture=True).stdout
    ssl_key = c.run("test-certs", "cat", "/secrets/certuser.key",
                    capture=True).stdout
    ssl_wrong_cert = c.run("test-certs",
                           "cat",
                           "/secrets/postgres.crt",
                           capture=True).stdout
    ssl_wrong_key = c.run("test-certs",
                          "cat",
                          "/secrets/postgres.key",
                          capture=True).stdout

    c.up("materialized", "test-certs", "testdrive", "postgres")
    c.wait_for_materialized()
    c.wait_for_postgres()
    c.run(
        "testdrive",
        f"--var=ssl-ca={ssl_ca}",
        f"--var=ssl-cert={ssl_cert}",
        f"--var=ssl-key={ssl_key}",
        f"--var=ssl-wrong-cert={ssl_wrong_cert}",
        f"--var=ssl-wrong-key={ssl_wrong_key}",
        *args.filter,
    )
Пример #6
0
def workflow_default(c: Composition) -> None:
    materialized = Materialized(options=[
        "--catalog-postgres-stash", "postgres://*****:*****@postgres"
    ], )
    postgres = Postgres(image="postgres:13.6")
    testdrive = Testdrive()

    with c.override(materialized, testdrive, postgres):
        c.up("postgres")
        c.wait_for_postgres()
        c.start_and_wait_for_tcp(services=["materialized"])
        c.wait_for_materialized("materialized")

        c.sql("CREATE TABLE a (i INT)")

        c.stop("postgres")
        c.up("postgres")
        c.wait_for_postgres()

        c.sql("CREATE TABLE b (i INT)")

        c.rm("postgres", stop=True, destroy_volumes=True)
        c.up("postgres")
        c.wait_for_postgres()

        # Postgres cleared its database, so this should fail.
        try:
            c.sql("CREATE TABLE c (i INT)")
            raise Exception("expected unreachable")
        except Exception as e:
            # Depending on timing, either of these errors can occur. The stash error comes
            # from the stash complaining. The network error comes from pg8000 complaining
            # because materialize panic'd.
            if "stash error: postgres: db error" not in str(
                    e) and "network error" not in str(e):
                raise e
Пример #7
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run CH-benCHmark without any load on Materialize"""

    # Parse arguments.
    parser.add_argument("--wait",
                        action="store_true",
                        help="wait for the load generator to exit")
    args, unknown_args = parser.parse_known_args()

    # Start Materialize.
    c.up("materialized")
    c.wait_for_materialized()

    # Start MySQL and Debezium.
    c.up("mysql", "debezium")
    c.wait_for_tcp(host="mysql", port=3306)
    c.wait_for_tcp(host="debezium", port=8083)

    # Generate initial data.
    c.run(
        "chbench",
        "gen",
        "--config-file-path=/etc/chbenchmark/mz-default-mysql.cfg",
        "--warehouses=1",
    )

    # Start Debezium.
    response = requests.post(
        f"http://localhost:{c.default_port('debezium')}/connectors",
        json={
            "name": "mysql-connector",
            "config": {
                "connector.class":
                "io.debezium.connector.mysql.MySqlConnector",
                "database.hostname": "mysql",
                "database.port": "3306",
                "database.user": "******",
                "database.password": "******",
                "database.server.name": "debezium",
                "database.server.id": "1234",
                "database.history.kafka.bootstrap.servers": "kafka:9092",
                "database.history.kafka.topic": "mysql-history",
                "database.allowPublicKeyRetrieval": "true",
                "time.precision.mode": "connect",
            },
        },
    )
    # Don't error if the connector already exists.
    if response.status_code != requests.codes.conflict:
        response.raise_for_status()

    # Run load generator.
    c.run(
        "chbench",
        "run",
        "--config-file-path=/etc/chbenchmark/mz-default-mysql.cfg",
        "--dsn=mysql",
        "--gen-dir=/var/lib/mysql-files",
        "--analytic-threads=0",
        "--transactional-threads=1",
        "--run-seconds=86400",
        "--mz-sources",
        *unknown_args,
        detach=not args.wait,
    )
Пример #8
0
def workflow_default(c: Composition) -> None:
    c.up("materialized")
    c.wait_for_materialized()
    c.run("csharp", "/workdir/test/lang/csharp/test.sh")
Пример #9
0
def test_cluster(c: Composition, *glob: str) -> None:
    c.up("dataflowd_1", "dataflowd_2")
    c.up("materialized")
    c.wait_for_materialized()
    c.run("testdrive-svc", *glob)
Пример #10
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--num-seconds",
        type=int,
        default=100,
        help="number of seconds to write records to Kafka",
    )
    parser.add_argument(
        "--records-per-second",
        type=int,
        default=10000,
        help="throughput of writes to maintain during testing",
    )
    parser.add_argument("--num-keys",
                        type=int,
                        default=1000000000,
                        help="number of distinct keys")
    parser.add_argument("--value-bytes",
                        type=int,
                        default=500,
                        help="record payload size in bytes")
    parser.add_argument("--timeout-secs",
                        type=int,
                        default=120,
                        help="timeout to send records to Kafka")
    parser.add_argument(
        "--blob-url",
        type=str,
        default=None,
        help="location where we store persistent data",
    )
    parser.add_argument(
        "--consensus-url",
        type=str,
        default=None,
        help="location where we store persistent data",
    )
    args = parser.parse_args()

    options = [
        "--persist-consensus-url",
        f"{args.consensus_url}",
        "--persist-blob-url",
        f"{args.blob_url}",
    ]

    override = [Materialized(options=options)]

    with c.override(*override):
        c.start_and_wait_for_tcp(services=prerequisites)

        c.up("materialized")
        c.wait_for_materialized("materialized")

        c.run(
            "testdrive",
            "setup.td",
        )

        start = time.monotonic()
        records_sent = 0
        total_records_to_send = args.records_per_second * args.num_seconds
        # Maximum observed delta between records sent by the benchmark and ingested by
        # Materialize.
        max_lag = 0
        last_reported_time = 0.0

        while True:
            elapsed = time.monotonic() - start
            records_ingested = query_materialize(c)

            lag = records_sent - records_ingested

            if lag > max_lag:
                max_lag = lag

            # Report our findings back once per second.
            if elapsed - last_reported_time > 1:
                print(
                    f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records"
                )
                last_reported_time = elapsed

            # Determine how many records we are scheduled to send, based on how long
            # the benchmark has been running and the desired QPS.
            records_scheduled = int(
                min(elapsed, args.num_seconds) * args.records_per_second)
            records_to_send = records_scheduled - records_sent

            if records_to_send > 0:
                send_records(
                    c,
                    num_records=records_to_send,
                    num_keys=args.num_keys,
                    value_bytes=args.value_bytes,
                    timeout_secs=args.timeout_secs,
                )
                records_sent = records_scheduled

            # Exit once we've sent all the records we need to send, and confirmed that
            # Materialize has ingested them.
            if records_sent == total_records_to_send == records_ingested:
                print(
                    f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records."
                )
                break
Пример #11
0
 def run(self, c: Composition) -> None:
     c.up("materialized")
     c.wait_for_materialized()
Пример #12
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--num-seconds",
        type=int,
        default=100,
        help="number of seconds to write records to Kafka",
    )
    parser.add_argument(
        "--records-per-second",
        type=int,
        default=10000,
        help="throughput of writes to maintain during testing",
    )
    parser.add_argument("--num-keys",
                        type=int,
                        default=1000000000,
                        help="number of distinct keys")
    parser.add_argument("--value-bytes",
                        type=int,
                        default=500,
                        help="record payload size in bytes")
    parser.add_argument(
        "--upsert",
        action="store_true",
        help="whether to use envelope UPSERT (True) or NONE (False)",
    )
    parser.add_argument("--timeout-secs",
                        type=int,
                        default=120,
                        help="timeout to send records to Kafka")
    parser.add_argument(
        "--enable-persistence",
        action="store_true",
        help="whether or not to enable persistence on materialized",
    )
    parser.add_argument(
        "--s3-storage",
        type=str,
        default=None,
        help=
        "enables s3 persist storage, pointed at the given subpath of our internal testing bucket",
    )
    parser.add_argument(
        "--workers",
        type=int,
        default=None,
        help="number of dataflow workers to use in materialized",
    )
    args = parser.parse_args()

    envelope = "NONE"
    if args.upsert:
        envelope = "UPSERT"

    options = []
    if args.enable_persistence:
        options = [
            "--persistent-user-tables",
            "--persistent-kafka-sources",
            "--disable-persistent-system-tables-test",
        ]

    if args.s3_storage == "":
        print("--s3-storage value must be non-empty", file=sys.stderr)
        sys.exit(1)
    elif args.s3_storage:
        if args.enable_persistence is not True:
            print(
                "cannot specifiy --s3-storage without --enable-persistence",
                file=sys.stderr,
            )
            sys.exit(1)
        options.extend([
            "--persist-storage-enabled",
            f"--persist-storage=s3://mtlz-test-persist-1d-lifecycle-delete/{args.s3_storage}",
        ])

    override = [
        Materialized(
            workers=args.workers,
            timestamp_frequency="1s",
            options=options,
        )
    ]

    with c.override(*override):
        c.start_and_wait_for_tcp(services=prerequisites)

        c.up("materialized")
        c.wait_for_materialized("materialized")

        c.run(
            "testdrive-svc",
            f"--var=envelope={envelope}",
            "setup.td",
        )

        start = time.monotonic()
        records_sent = 0
        total_records_to_send = args.records_per_second * args.num_seconds
        # Maximum observed delta between records sent by the benchmark and ingested by
        # Materialize.
        max_lag = 0
        last_reported_time = 0.0

        while True:
            elapsed = time.monotonic() - start
            records_ingested = query_materialize(c)

            lag = records_sent - records_ingested

            if lag > max_lag:
                max_lag = lag

            # Report our findings back once per second.
            if elapsed - last_reported_time > 1:
                print(
                    f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records"
                )
                last_reported_time = elapsed

            # Determine how many records we are scheduled to send, based on how long
            # the benchmark has been running and the desired QPS.
            records_scheduled = int(
                min(elapsed, args.num_seconds) * args.records_per_second)
            records_to_send = records_scheduled - records_sent

            if records_to_send > 0:
                send_records(
                    c,
                    num_records=records_to_send,
                    num_keys=args.num_keys,
                    value_bytes=args.value_bytes,
                    timeout_secs=args.timeout_secs,
                )
                records_sent = records_scheduled

            # Exit once we've sent all the records we need to send, and confirmed that
            # Materialize has ingested them.
            if records_sent == total_records_to_send == records_ingested:
                print(
                    f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records."
                )
                break
Пример #13
0
def workflow_default(c: Composition) -> None:
    c.start_and_wait_for_tcp(services=prerequisites)

    c.wait_for_materialized("materialized")

    c.run("testdrive", "test.td")
Пример #14
0
def workflow_default(c: Composition) -> None:
    c.start_and_wait_for_tcp(services=["materialized"])
    c.wait_for_materialized("materialized")

    # ensure that the directory has restricted permissions
    c.exec(
        "materialized",
        "bash",
        "-c",
        "[[ `stat -c \"%a\" /mzdata/secrets` == '700' ]] && exit 0 || exit 1",
    )

    c.sql("CREATE SECRET secret AS 's3cret'")
    # Check that the contents of the secret have made it to the storage
    c.exec(
        "materialized",
        "bash",
        "-c",
        "[[ `cat /mzdata/secrets/*` == 's3cret' ]] && exit 0 || exit 1",
    )

    # Check that the file permissions are restrictive
    c.exec(
        "materialized",
        "bash",
        "-c",
        "[[ `stat -c \"%a\" /mzdata/secrets/*` == '600' ]] && exit 0 || exit 1",
    )

    # Check that alter secret gets reflected on disk
    c.sql("ALTER SECRET secret AS 'tops3cret'")
    c.exec(
        "materialized",
        "bash",
        "-c",
        "[[ `cat /mzdata/secrets/*` == 'tops3cret' ]] && exit 0 || exit 1",
    )

    # check that replacing the file did not change permissions
    c.exec(
        "materialized",
        "bash",
        "-c",
        "[[ `stat -c \"%a\" /mzdata/secrets/*` == '600' ]] && exit 0 || exit 1",
    )

    # Rename should not change the contents on disk
    c.sql("ALTER SECRET secret RENAME TO renamed_secret")

    # Check that the contents of the secret have made it to the storage
    c.exec(
        "materialized",
        "bash",
        "-c",
        "[[ `cat /mzdata/secrets/*` == 'tops3cret' ]] && exit 0 || exit 1",
    )

    c.sql("DROP SECRET renamed_secret")
    # Check that the file has been deleted from the storage
    c.exec(
        "materialized",
        "bash",
        "-c",
        "[[ -z `ls -A /mzdata/secrets` ]] && exit 0 || exit 1",
    )
Пример #15
0
def workflow_instance_size(c: Composition,
                           parser: WorkflowArgumentParser) -> None:
    """Create multiple clusters with multiple nodes and replicas each"""
    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry"])

    parser.add_argument(
        "--workers",
        type=int,
        metavar="N",
        default=2,
        help="set the default number of workers",
    )

    parser.add_argument(
        "--clusters",
        type=int,
        metavar="N",
        default=16,
        help="set the number of clusters to create",
    )
    parser.add_argument(
        "--nodes",
        type=int,
        metavar="N",
        default=4,
        help="set the number of nodes per cluster",
    )
    parser.add_argument(
        "--replicas",
        type=int,
        metavar="N",
        default=4,
        help="set the number of replicas per cluster",
    )
    args = parser.parse_args()

    c.up("testdrive", persistent=True)
    c.up("materialized")
    c.wait_for_materialized()

    # Construct the requied Computed instances and peer them into clusters
    computeds = []
    for cluster_id in range(0, args.clusters):
        for replica_id in range(0, args.replicas):
            nodes = []
            for node_id in range(0, args.nodes):
                node_name = f"computed_{cluster_id}_{replica_id}_{node_id}"
                nodes.append(node_name)

            for node_id in range(0, args.nodes):
                computeds.append(
                    Computed(name=nodes[node_id],
                             peers=nodes,
                             workers=args.workers))

    with c.override(*computeds):
        with c.override(Testdrive(seed=1, no_reset=True)):

            for n in computeds:
                c.up(n.name)

            # Create some input data
            c.testdrive(
                dedent("""
                    > CREATE TABLE ten (f1 INTEGER);
                    > INSERT INTO ten VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

                    $ set schema={
                        "type" : "record",
                        "name" : "test",
                        "fields" : [
                            {"name":"f1", "type":"string"}
                        ]
                      }

                    $ kafka-create-topic topic=instance-size

                    $ kafka-ingest format=avro topic=instance-size schema=${schema} publish=true repeat=10000
                    {"f1": "fish"}
                    """))

            # Construct the required CREATE CLUSTER statements
            for cluster_id in range(0, args.clusters):
                replica_definitions = []
                for replica_id in range(0, args.replicas):
                    nodes = []
                    for node_id in range(0, args.nodes):
                        node_name = f"computed_{cluster_id}_{replica_id}_{node_id}"
                        nodes.append(node_name)

                    replica_name = f"replica_{cluster_id}_{replica_id}"

                    replica_definitions.append(f"{replica_name} (REMOTE [" +
                                               ", ".join(f"'{n}:2100'"
                                                         for n in nodes) +
                                               "])")

                c.sql(f"CREATE CLUSTER cluster_{cluster_id} REPLICAS (" +
                      ",".join(replica_definitions) + ")")

            # Construct some dataflows in each cluster
            for cluster_id in range(0, args.clusters):
                cluster_name = f"cluster_{cluster_id}"

                c.testdrive(
                    dedent(f"""
                         > SET cluster={cluster_name}

                         > CREATE DEFAULT INDEX ON ten;

                         > CREATE MATERIALIZED VIEW v_{cluster_name} AS
                           SELECT COUNT(*) AS c1 FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;

                         > CREATE MATERIALIZED SOURCE s_{cluster_name}
                           FROM KAFKA BROKER '${{testdrive.kafka-addr}}' TOPIC
                           'testdrive-instance-size-${{testdrive.seed}}'
                           FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY '${{testdrive.schema-registry-url}}'
                           ENVELOPE NONE
                     """))

            # Validate that each individual cluster is operating properly
            for cluster_id in range(0, args.clusters):
                cluster_name = f"cluster_{cluster_id}"

                c.testdrive(
                    dedent(f"""
                         > SET cluster={cluster_name}

                         > SELECT c1 FROM v_{cluster_name};
                         10000

                         > SELECT COUNT(*) FROM s_{cluster_name}
                         10000
                     """))
Пример #16
0
def restart_mz(c: Composition) -> None:
    c.kill("materialized")
    c.up("materialized")
    c.wait_for_materialized()
Пример #17
0
def workflow_default(c: Composition) -> None:
    c.start_and_wait_for_tcp(services=["materialized"])
    c.wait_for_materialized("materialized")

    c.run("testdrive", "ssh-connections.td")
Пример #18
0
def workflow_default(c: Composition) -> None:
    c.up("materialized", "metabase")
    c.wait_for_materialized()
    c.run("smoketest")
Пример #19
0
def workflow_default(c: Composition) -> None:
    """Streams data from Wikipedia to a browser visualzation."""
    c.up("server")
    c.wait_for_materialized()
    c.sql((Path(__file__).parent / "views.sql").read_text())
Пример #20
0
 def execute(self, c: Composition) -> None:
     c.kill("materialized")
     c.up("materialized")
     c.wait_for_materialized()
Пример #21
0
def start_everything(c: Composition) -> None:
    c.up("kafka", "materialized")
    c.wait_for_tcp(host="kafka", port=9092)
    c.wait_for_materialized()
Пример #22
0
def workflow_two_mz(c: Composition) -> None:
    for mz in multiple_mz:
        c.up(mz.name)
        c.wait_for_materialized(mz.name)
Пример #23
0
def run(c: Composition, args: List[str], detach: bool) -> None:
    c.up("materialized")
    c.wait_for_materialized()
    c.run("perf-kinesis", *args, detach=detach)
Пример #24
0
def run_test(c: Composition, disruption: Disruption, id: int) -> None:
    print(f"+++ Running disruption scenario {disruption.name}")

    c.up("testdrive", persistent=True)
    c.up("materialized")
    c.wait_for_materialized()

    nodes = [
        Computed(
            name="computed_1_1",
            peers=["computed_1_1", "computed_1_2"],
        ),
        Computed(
            name="computed_1_2",
            peers=["computed_1_1", "computed_1_2"],
        ),
        Computed(
            name="computed_2_1",
            peers=["computed_2_1", "computed_2_2"],
        ),
        Computed(
            name="computed_2_2",
            peers=["computed_2_1", "computed_2_2"],
        ),
    ]

    with c.override(*nodes):
        c.up(*[n.name for n in nodes])

        c.sql("""
            DROP CLUSTER IF EXISTS cluster1 CASCADE;
            CREATE CLUSTER cluster1 REPLICAS (replica1 (REMOTE ['computed_1_1:2100', 'computed_1_2:2100']));
            """)

        c.sql("""
            DROP CLUSTER IF EXISTS cluster2 CASCADE;
            CREATE CLUSTER cluster2 REPLICAS (replica1 (REMOTE ['computed_2_1:2100', 'computed_2_2:2100']));
            """)

        with c.override(
                Testdrive(
                    validate_data_dir=False,
                    no_reset=True,
                    materialize_params={"cluster": "cluster2"},
                    seed=id,
                )):
            populate(c)

            # Disrupt cluster1 by some means
            disruption.disruption(c)

            validate(c)

        cleanup_list = [
            "materialized",
            "testdrive",
            *[n.name for n in nodes],
        ]
        c.kill(*cleanup_list)
        c.rm(*cleanup_list, destroy_volumes=True)

    c.rm_volumes("mzdata", "pgdata")
Пример #25
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run testdrive."""
    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )
    parser.add_argument(
        "--aws-region",
        help="run against the specified AWS region instead of localstack",
    )
    parser.add_argument(
        "--workers",
        type=int,
        metavar="N",
        help="set the number of materialized dataflow workers",
    )
    parser.add_argument(
        "--kafka-default-partitions",
        type=int,
        metavar="N",
        help="set the default number of kafka partitions per topic",
    )
    parser.add_argument(
        "--persistent-user-tables",
        action="store_true",
        help="enable the --persistent-user-tables materialized option",
    )
    parser.add_argument(
        "files",
        nargs="*",
        default=["*.td", "esoteric/*.td"],
        help="run against the specified files",
    )
    args = parser.parse_args()

    if not args.redpanda and Arch.host() == Arch.AARCH64:
        ui.warn(
            "Running the Confluent Platform in Docker on ARM-based machines is "
            "nearly unusably slow. Consider using Redpanda instead (--redpanda) "
            "or running tests without mzcompose."
        )

    dependencies = ["materialized"]
    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]

    materialized = Materialized(
        workers=args.workers,
        options=["--persistent-user-tables"] if args.persistent_user_tables else [],
    )

    testdrive = Testdrive(
        forward_buildkite_shard=True,
        kafka_default_partitions=args.kafka_default_partitions,
        entrypoint_extra=[f"--aws-region={args.aws_region}"]
        if args.aws_region
        else ["--aws-endpoint=http://localstack:4566"],
    )

    with c.override(materialized, testdrive):
        c.start_and_wait_for_tcp(services=dependencies)
        c.wait_for_materialized("materialized")
        try:
            junit_report = ci_util.junit_report_filename(c.name)
            c.run("testdrive-svc", f"--junit-report={junit_report}", *args.files)
        finally:
            ci_util.upload_junit_report(
                "testdrive", Path(__file__).parent / junit_report
            )
Пример #26
0
def workflow_default(c: Composition) -> None:
    "Test that materialize can use a multitude of auth schemes to connect to AWS"
    LOCAL_DIR.mkdir()

    session = boto3.Session()
    sts: STSClient = session.client("sts")
    iam: IAMClient = session.client("iam")

    identity = sts.get_caller_identity()
    current_user = identity["Arn"]

    aws_region = session.region_name

    created_roles: List[CreatedRole] = []
    try:
        allowed = create_role(iam, "Allow", current_user, created_roles)
        denied = create_role(iam, "Deny", current_user, created_roles)
        requires_eid = create_role(
            iam, "Allow", current_user, created_roles, external_id=EXTERNAL_ID
        )
        profile_contents = gen_profile_text(
            session, allowed.arn, requires_eid.arn, denied.arn
        )

        wait_for_role(sts, allowed.arn)

        td_args = [
            f"--aws-region={aws_region}",
            f"--var=allowed-role-arn={allowed.arn}",
            f"--var=denied-role-arn={denied.arn}",
            f"--var=role-requires-eid={requires_eid.arn}",
        ]

        # == Run core tests ==

        c.up("materialized")

        write_aws_config(LOCAL_DIR, profile_contents)

        c.wait_for_materialized("materialized")
        c.run(
            "testdrive",
            *td_args,
            "test.td",
        )
        c.run(
            "testdrive",
            *td_args,
            # no reset because the next test wants to validate behavior with
            # the previous catalog
            "--no-reset",
            "test-externalid-missing.td",
        )

        # == Tests that restarting materialized without a profile doesn't bork mz ==

        print("+++ Test Restarts with and without profile files")

        # Historically, a missing aws config file would cause all SQL
        # commands to hang entirely after a restart, this no longer happens
        # but this step restarts to catch it if it comes back.
        c.stop("materialized")

        rm_aws_config(LOCAL_DIR)

        c.up("materialized")

        c.run(
            "testdrive",
            "--no-reset",
            "test-restart-no-creds.td",
        )

        # now test that with added credentials things can be done
        write_aws_config(LOCAL_DIR, profile_contents)
        c.run("testdrive", *td_args, "test-restart-with-creds.td")

        # == Test that requires --aws-external-id has been supplied ==
        print("+++ Test AWS External IDs")
        c.stop("materialized")
        c.rm("materialized")

        with c.override(MZ_EID):
            c.up("materialized")
            c.wait_for_materialized("materialized")
            write_aws_config(LOCAL_DIR, profile_contents)
            c.run("testdrive", *td_args, "test-externalid-present.td")
    finally:
        errored = False
        for role in created_roles:
            try:
                iam.delete_role_policy(RoleName=role.name, PolicyName=role.policy_name)
            except Exception as e:
                errored = True
                print(
                    f"> Unable to delete role policy {role.name}/{role.policy_name}: {e}"
                )

            try:
                iam.delete_role(RoleName=role.name)
                print(f"> Deleted IAM role {role.name}")
            except Exception as e:
                errored = True
                print(f"> Unable to delete role {role.name}: {e}")

        rm_aws_config(LOCAL_DIR)
        LOCAL_DIR.rmdir()

        if errored:
            raise UIError("Unable to completely clean up AWS resources")
Пример #27
0
def workflow_default(c: Composition) -> None:
    c.up("materialized")
    c.wait_for_materialized()
    c.run("java-smoketest", "mvn", "test")