示例#1
0
def workflow_start_two_mzs(c: Composition,
                           parser: WorkflowArgumentParser) -> None:
    """Starts two Mz instances from different git tags for the purpose of manually running
    RQG comparison tests.
    """
    parser.add_argument("--this-tag",
                        help="Run Materialize with this git tag on port 6875")

    parser.add_argument("--other-tag",
                        help="Run Materialize with this git tag on port 16875")
    args = parser.parse_args()

    with c.override(
            Materialized(
                name="mz_this",
                image=f"materialize/materialized:{args.this_tag}"
                if args.this_tag else None,
                volumes=
                [],  # Keep the mzdata, pgdata, etc. private to the container
                allow_host_ports=True,
                ports=["6875:6875"],
            ),
            Materialized(
                name="mz_other",
                image=f"materialize/materialized:{args.other_tag}"
                if args.other_tag else None,
                volumes=[],
                allow_host_ports=True,
                ports=["16875:6875"],
            ),
    ):
        for mz in ["mz_this", "mz_other"]:
            c.up(mz)
            c.wait_for_materialized(service=mz)
示例#2
0
def start_services(c: Composition, args: argparse.Namespace,
                   instance: str) -> List[Service]:
    tag, options, nodes, workers = ((args.this_tag, args.this_options,
                                     args.this_nodes, args.this_workers)
                                    if instance == "this" else
                                    (args.other_tag, args.other_options,
                                     args.other_nodes, args.other_workers))

    cluster_services: List[Service] = []

    if nodes:
        cluster_services.append(
            Materialized(
                image=f"materialize/materialized:{tag}" if tag else None, ))

        node_names = [f"computed_{n}" for n in range(0, nodes)]
        for node_id in range(0, nodes):
            cluster_services.append(
                Computed(
                    name=node_names[node_id],
                    workers=workers,
                    options=options,
                    peers=node_names,
                    image=f"materialize/computed:{tag}" if tag else None,
                ))
    else:
        cluster_services.append(
            Materialized(
                image=f"materialize/materialized:{tag}" if tag else None,
                workers=workers,
                options=options,
            ))

    with c.override(*cluster_services):
        print(f"The version of the '{instance.upper()}' Mz instance is:")
        c.run("materialized", "--version")

        # Single-binary legacy Mz instances only have port 6875 open
        # so only check that port before proceeding
        c.up("materialized")
        c.wait_for_materialized(port=6875)

        if nodes:
            print(f"Starting cluster for '{instance.upper()}' ...")
            c.up(*[f"computed_{n}" for n in range(0, nodes)])

            c.sql("CREATE CLUSTER REPLICA default.feature_benchmark REMOTE [" +
                  ",".join([f"'computed_{n}:2100'"
                            for n in range(0, nodes)]) + "];")

            c.sql("DROP CLUSTER REPLICA default.default_replica")

    c.up("testdrive", persistent=True)

    return cluster_services
示例#3
0
def run_one_scenario(
    c: Composition, scenario: Type[Scenario], args: argparse.Namespace
) -> Comparator:
    name = scenario.__name__
    print(f"--- Now benchmarking {name} ...")
    comparator = make_comparator(name)
    common_seed = round(time.time())

    mzs = {
        "this": Materialized(
            image=f"materialize/materialized:{args.this_tag}"
            if args.this_tag
            else None,
            options=args.this_options,
        ),
        "other": Materialized(
            image=f"materialize/materialized:{args.other_tag}"
            if args.other_tag
            else None,
            options=args.other_options,
        ),
    }

    for mz_id, instance in enumerate(["this", "other"]):
        with c.override(mzs[instance]):
            print(f"The version of the '{instance.upper()}' Mz instance is:")
            c.run("materialized", "--version")

            c.start_and_wait_for_tcp(services=["materialized"])
            c.wait_for_materialized()

            executor = Docker(
                composition=c,
                seed=common_seed,
            )

            benchmark = Benchmark(
                mz_id=mz_id,
                scenario=scenario,
                scale=args.scale,
                executor=executor,
                filter=make_filter(args),
                termination_conditions=make_termination_conditions(args),
                aggregation=make_aggregation(),
            )

            outcome, iterations = benchmark.run()
            comparator.append(outcome)

            c.kill("materialized")
            c.rm("materialized", "testdrive-svc")
            c.rm_volumes("mzdata")

    return comparator
示例#4
0
def workflow_test_builtin_migration(c: Composition) -> None:
    """Exercise the builtin object migration code by upgrading between two versions
    that will have a migration triggered between them. Create a materialized view
    over the affected builtin object to confirm that the migration was successful
    """

    c.down(destroy_volumes=True)
    with c.override(
            # Random commit before pg_roles was updated.
            Materialized(
                image=
                "materialize/materialized:devel-9efd269199b1510b3e8f90196cb4fa3072a548a1",
            ),
            Testdrive(default_timeout="15s",
                      no_reset=True,
                      consistent_seed=True),
    ):
        c.up("testdrive", persistent=True)
        c.up("materialized")
        c.wait_for_materialized()

        c.testdrive(input=dedent("""
        > CREATE VIEW v1 AS SELECT COUNT(*) FROM pg_roles;
        > SELECT * FROM v1;
        2
        ! SELECT DISTINCT rolconnlimit FROM pg_roles;
        contains:column "rolconnlimit" does not exist
    """))

        c.kill("materialized")

    with c.override(
            # This will stop working if we introduce a breaking change.
            Materialized(),
            Testdrive(default_timeout="15s",
                      no_reset=True,
                      consistent_seed=True),
    ):
        c.up("testdrive", persistent=True)
        c.up("materialized")
        c.wait_for_materialized()

        c.testdrive(input=dedent("""
       > SELECT * FROM v1;
       2
       # This column is new after the migration
       > SELECT DISTINCT rolconnlimit FROM pg_roles;
       -1
    """))
示例#5
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run the proxy tests."""
    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )
    parser.add_argument(
        "--aws-region",
        help="run against the specified AWS region instead of localstack",
    )
    args = parser.parse_args()

    dependencies = ["squid"]
    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]
    if not args.aws_region:
        dependencies += ["localstack"]
    c.start_and_wait_for_tcp(dependencies)

    aws_arg = (f"--aws-region={args.aws_region}"
               if args.aws_region else "--aws-endpoint=http://localstack:4566")

    for test_case in test_cases:
        print(f"Running test case {test_case.name!r}")
        with c.override(Materialized(environment_extra=test_case.env)):
            c.up("materialized")
            c.wait_for_materialized("materialized")
            c.run("testdrive-svc", aws_arg, *test_case.files)
示例#6
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Runs the dbt adapter test suite against Materialize in various configurations."""
    parser.add_argument(
        "filter", nargs="?", default="", help="limit to test cases matching filter"
    )
    args = parser.parse_args()

    for test_case in test_cases:
        if args.filter in test_case.name:
            print(f"> Running test case {test_case.name}")
            materialized = Materialized(
                options=test_case.materialized_options,
                image=test_case.materialized_image,
                depends_on=["test-certs"],
                volumes=["secrets:/secrets"],
            )

            with c.override(materialized):
                c.up("materialized")
                c.wait_for_tcp(host="materialized", port=6875)
                c.run(
                    "dbt-test",
                    "pytest",
                    "dbt-materialize/test",
                    env_extra=test_case.dbt_env,
                )
示例#7
0
def workflow_disable_user_indexes(c: Composition) -> None:
    seed = round(time.time())

    c.start_and_wait_for_tcp(services=prerequisites)

    c.up("materialized")
    c.wait_for_materialized()

    c.run("testdrive-svc", f"--seed={seed}", "disable-user-indexes/before.td")

    c.kill("materialized")

    with c.override(
            Materialized(options=f"{mz_options} --disable-user-indexes", )):
        c.up("materialized")
        c.wait_for_materialized()

        c.run("testdrive-svc", f"--seed={seed}",
              "disable-user-indexes/after.td")

        c.kill("materialized")

    c.rm("materialized", "testdrive-svc", destroy_volumes=True)

    c.rm_volumes("mzdata")
示例#8
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Run testdrive."""
    parser.add_argument(
        "--redpanda",
        action="store_true",
        help="run against Redpanda instead of the Confluent Platform",
    )
    parser.add_argument(
        "--aws-region",
        help="run against the specified AWS region instead of localstack",
    )
    parser.add_argument(
        "--workers",
        type=int,
        metavar="N",
        help="set the number of materialized dataflow workers",
    )
    parser.add_argument(
        "--persistent-user-tables",
        action="store_true",
        help="enable the --persistent-user-tables materialized option",
    )
    parser.add_argument(
        "files",
        nargs="*",
        default=["*.td", "esoteric/*.td"],
        help="run against the specified files",
    )
    args = parser.parse_args()

    if not args.redpanda and Arch.host() == Arch.AARCH64:
        ui.warn(
            "Running the Confluent Platform in Docker on ARM-based machines is "
            "nearly unusably slow. Consider using Redpanda instead (--redpanda) "
            "or running tests without mzcompose.")

    dependencies = ["materialized"]
    if args.redpanda:
        dependencies += ["redpanda"]
    else:
        dependencies += ["zookeeper", "kafka", "schema-registry"]

    materialized = Materialized(
        workers=args.workers,
        options=["--persistent-user-tables"]
        if args.persistent_user_tables else [],
    )

    testdrive = Testdrive(
        forward_buildkite_shard=True,
        entrypoint_extra=[f"--aws-region={args.aws_region}"]
        if args.aws_region else ["--aws-endpoint=http://localstack:4566"],
    )

    with c.override(materialized, testdrive):
        c.start_and_wait_for_tcp(services=dependencies)
        c.wait_for_materialized("materialized")
        c.run("testdrive-svc", *args.files)
        c.kill("materialized")
示例#9
0
def test_upgrade_from_version(
    c: Composition, from_version: str, priors: List[str], filter: str, style: str = ""
) -> None:
    print(f"===>>> Testing upgrade from Materialize {from_version} to current_source.")

    version_glob = "{" + ",".join(["any_version", *priors, from_version]) + "}"
    print(">>> Version glob pattern: " + version_glob)

    c.rm("materialized", "testdrive-svc", stop=True)
    c.rm_volumes("mzdata", "tmp")

    if from_version != "current_source":
        mz_from = Materialized(
            image=f"materialize/materialized:{from_version}",
            options=" ".join(
                opt
                for start_version, opt in mz_options.items()
                if from_version[1:] >= start_version
            ),
            environment=[
                "SSL_KEY_PASSWORD=mzmzmz",
            ],
            volumes_extra=["secrets:/share/secrets"],
        )
        with c.override(mz_from):
            c.up("materialized")
    else:
        c.up("materialized")

    c.wait_for_materialized("materialized")

    temp_dir = f"--temp-dir=/share/tmp/upgrade-from-{from_version}"
    seed = f"--seed={random.getrandbits(32)}"
    c.run(
        "testdrive-svc",
        "--no-reset",
        f"--var=upgrade-from-version={from_version}",
        temp_dir,
        seed,
        f"create-{style}in-{version_glob}-{filter}.td",
    )

    c.kill("materialized")
    c.rm("materialized", "testdrive-svc")

    c.up("materialized")
    c.wait_for_materialized("materialized")

    c.run(
        "testdrive-svc",
        "--no-reset",
        f"--var=upgrade-from-version={from_version}",
        temp_dir,
        seed,
        "--validate-catalog=/share/mzdata/catalog",
        f"check-{style}from-{version_glob}-{filter}.td",
    )
示例#10
0
def workflow_compaction(c: Composition) -> None:
    with c.override(Materialized(options=f"--metrics-scraping-interval=1s", )):
        c.up("materialized")
        c.wait_for_materialized()

        c.run("testdrive", "compaction/compaction.td")

        c.kill("materialized")

    c.rm("materialized", "testdrive", destroy_volumes=True)

    c.rm_volumes("mzdata", "pgdata")
示例#11
0
def test_upgrade_from_version(
    c: Composition, from_version: str, priors: List[str], filter: str
) -> None:
    print(f"===>>> Testing upgrade from Materialize {from_version} to current_source.")

    version_glob = "|".join(["any_version", *priors, from_version])
    print(">>> Version glob pattern: " + version_glob)

    if from_version != "current_source":
        mz_from = Materialized(
            image=f"materialize/materialized:{from_version}",
            options=" ".join(
                opt
                for start_version, opt in mz_options.items()
                if from_version[1:] >= start_version
            ),
        )
        with c.override(mz_from):
            c.up("materialized")
    else:
        c.up("materialized")

    c.wait_for_materialized("materialized")

    temp_dir = f"--temp-dir=/share/tmp/upgrade-from-{from_version}"
    with patch.dict(os.environ, {"UPGRADE_FROM_VERSION": from_version}):
        c.run(
            "testdrive-svc",
            "--seed=1",
            "--no-reset",
            temp_dir,
            f"create-in-@({version_glob})-{filter}.td",
        )

    c.kill("materialized")
    c.rm("materialized", "testdrive-svc")

    c.up("materialized")
    c.wait_for_materialized("materialized")

    with patch.dict(os.environ, {"UPGRADE_FROM_VERSION": from_version}):
        c.run(
            "testdrive-svc",
            "--seed=1",
            "--no-reset",
            temp_dir,
            f"--validate-catalog=/share/mzdata/catalog check-from-@({version_glob})-{filter}.td",
        )

    c.kill("materialized")
    c.rm("materialized", "testdrive-svc")
    c.rm_volumes("mzdata", "tmp")
示例#12
0
def workflow_test_resource_limits(c: Composition) -> None:
    """Test resource limits in Materialize."""

    c.down(destroy_volumes=True)

    with c.override(
            Testdrive(),
            Materialized(),
    ):
        c.up("materialized")
        c.wait_for_materialized()

        c.run("testdrive", "resources/resource-limits.td")
示例#13
0
def workflow_stash(c: Composition) -> None:
    c.rm(
        "testdrive",
        "materialized",
        stop=True,
        destroy_volumes=True,
    )
    c.rm_volumes("mzdata", "pgdata", force=True)

    materialized = Materialized(options=[
        "--adapter-stash-url", "postgres://*****:*****@postgres"
    ], )
    postgres = Postgres(image="postgres:14.4")

    with c.override(materialized, postgres):
        c.up("postgres")
        c.wait_for_postgres()
        c.start_and_wait_for_tcp(services=["materialized"])
        c.wait_for_materialized("materialized")

        c.sql("CREATE TABLE a (i INT)")

        c.stop("postgres")
        c.up("postgres")
        c.wait_for_postgres()

        c.sql("CREATE TABLE b (i INT)")

        c.rm("postgres", stop=True, destroy_volumes=True)
        c.up("postgres")
        c.wait_for_postgres()

        # Postgres cleared its database, so this should fail.
        try:
            c.sql("CREATE TABLE c (i INT)")
            raise Exception("expected unreachable")
        except Exception as e:
            # Depending on timing, either of these errors can occur. The stash error comes
            # from the stash complaining. The network error comes from pg8000 complaining
            # because materialize panic'd.
            if "stash error: postgres: db error" not in str(
                    e) and "network error" not in str(e):
                raise e
示例#14
0
def workflow_test_remote_storaged(c: Composition) -> None:
    """Test creating sources in a remote storaged process."""

    c.down(destroy_volumes=True)

    with c.override(
            Testdrive(default_timeout="15s",
                      no_reset=True,
                      consistent_seed=True),
            # Use a separate PostgreSQL service for persist rather than the one in
            # the `Materialized` service, so that crashing `environmentd` does not
            # also take down PostgreSQL.
            Postgres(),
            Materialized(
                options=
                "--persist-consensus-url=postgres://postgres:postgres@postgres"
            ),
    ):
        dependencies = [
            "materialized",
            "postgres",
            "storaged",
            "redpanda",
        ]
        c.start_and_wait_for_tcp(services=dependencies, )

        c.run("testdrive", "storaged/01-create-sources.td")

        c.kill("materialized")
        c.up("materialized")
        c.run("testdrive", "storaged/02-after-environmentd-restart.td")

        c.kill("storaged")
        c.run("testdrive", "storaged/03-while-storaged-down.td")

        c.up("storaged")
        c.run("testdrive", "storaged/04-after-storaged-restart.td")
示例#15
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    """Runs the dbt adapter test suite against Materialize in various configurations."""
    parser.add_argument("filter",
                        nargs="?",
                        default="",
                        help="limit to test cases matching filter")
    args = parser.parse_args()

    for test_case in test_cases:
        if args.filter in test_case.name:
            print(f"> Running test case {test_case.name}")
            materialized = Materialized(
                options=test_case.materialized_options,
                image=test_case.materialized_image,
                depends_on=["test-certs"],
                volumes_extra=["secrets:/secrets"],
            )

            with c.test_case(test_case.name):
                with c.override(materialized):
                    c.down()
                    c.start_and_wait_for_tcp(
                        services=["zookeeper", "kafka", "schema-registry"])
                    c.up("materialized")
                    c.wait_for_tcp(host="materialized", port=6875)
                    c.run(
                        "dbt-test",
                        "pytest",
                        "dbt-materialize/test",
                        env_extra={
                            "DBT_HOST": "materialized",
                            "KAFKA_ADDR": "kafka:9092",
                            "SCHEMA_REGISTRY_URL":
                            "http://schema-registry:8081",
                            **test_case.dbt_env,
                        },
                    )
示例#16
0
import os
import time

from materialize.mzcompose import Composition
from materialize.mzcompose.services import (
    Kafka,
    Materialized,
    SchemaRegistry,
    Testdrive,
    Zookeeper,
)

mz_options = "--persistent-user-tables --persistent-kafka-sources --disable-persistent-system-tables-test"

mz_default = Materialized(options=mz_options)

mz_logical_compaction_window_off = Materialized(
    # We need to use 1s and not 100ms here as otherwise validate_timestamp_bindings()
    # dominates the CPU see #10740
    timestamp_frequency="1s",
    options=f"{mz_options} --logical-compaction-window=off",
)

# TODO: add back mz_logical_compaction_window_off in the line below.
# See: https://github.com/MaterializeInc/materialize/issues/10488
mz_configurations = [mz_default]

prerequisites = ["zookeeper", "kafka", "schema-registry"]

SERVICES = [
示例#17
0
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

from materialize.mzcompose import Composition
from materialize.mzcompose.services import (
    Kafka,
    Materialized,
    SchemaRegistry,
    Testdrive,
    Zookeeper,
)

versioned_mz = [
    Materialized(
        name=f"materialized_{version}",
        image=f"materialize/materialized:{version}",
        hostname="materialized",
    ) for version in ["v0.7.0", "v0.8.0"]
]

multiple_mz = [
    Materialized(name=f"materialized{i}",
                 data_directory=f"/share/materialized{i}",
                 port=6875 + i) for i in [1, 2]
]

mz_with_options = [
    Materialized(name="mz_2_workers",
                 hostname="materialized",
                 options="--workers 2"),
    Materialized(name="mz_4_workers",
示例#18
0
         "SCHEMA_REGISTRY_AUTHENTICATION_METHOD=BASIC",
         "SCHEMA_REGISTRY_AUTHENTICATION_ROLES=user",
         "SCHEMA_REGISTRY_AUTHENTICATION_REALM=SchemaRegistry",
         "KAFKA_OPTS=-Djava.security.auth.login.config=/etc/schema-registry/sasl.jaas.config",
         "SCHEMA_REGISTRY_OPTS=-Djava.security.auth.login.config=/etc/schema-registry/sasl.jaas.config",
     ],
     volumes=[
         "secrets:/etc/schema-registry/secrets",
         "./sasl.jaas.config:/etc/schema-registry/sasl.jaas.config",
         "./users.properties:/etc/schema-registry/users.properties",
     ],
     bootstrap_server_type="SASL_SSL",
 ),
 Materialized(
     environment_extra=[
         "SASL_PASSWORD=sekurity",
     ],
     volumes_extra=["secrets:/share/secrets"],
 ),
 Testdrive(
     entrypoint=[
         "bash",
         "-c",
         "cp /share/secrets/ca.crt /usr/local/share/ca-certificates/ca.crt && "
         "update-ca-certificates && "
         "testdrive "
         "--kafka-addr=kafka:9092 "
         "--kafka-option=security.protocol=SASL_SSL "
         "--kafka-option=sasl.mechanism=PLAIN "
         "--kafka-option=sasl.username=materialize "
         "--kafka-option=sasl.password=sekurity "
         "--schema-registry-url=https://materialize:sekurity@schema-registry:8081 "
示例#19
0

def make_comparator(name: str) -> Comparator:
    return RelativeThresholdComparator(name, threshold=0.10)


default_timeout = "5m"

SERVICES = [
    Zookeeper(),
    Kafka(),
    SchemaRegistry(),
    # We are going to override this service definition during the actual benchmark
    # we put "latest" here so that we avoid recompiling the current source unless
    # we will actually be benchmarking it.
    Materialized(image="latest"),
    Testdrive(
        validate_catalog=False,
        default_timeout=default_timeout,
    ),
]


def run_one_scenario(
    c: Composition, scenario: Scenario, args: argparse.Namespace
) -> Comparator:
    name = scenario.__name__
    print(f"Now benchmarking {name} ...")
    comparator = make_comparator(name)
    common_seed = round(time.time())
示例#20
0
        "name":
        "no_proxy",
        "env": [
            "ALL_PROXY=http://localhost:1234",
            "NO_PROXY=schema-registry,amazonaws.com,localstack",
        ],
        "td":
        "testdrive/avro-registry.td testdrive/esoteric/s3.td",
    },
]

# Construct a dedicated Mz instance for each set of env variables under test
for t in tests:
    t["mz"] = Materialized(
        name=f"materialized_{t['name']}",
        hostname="materialized",
        environment_extra=t["env"],
    )

mzs = [t["mz"] for t in tests]

SERVICES = [
    Zookeeper(),
    Kafka(),
    SchemaRegistry(),
    Squid(),
    Localstack(),
    *mzs,
    Testdrive(volumes_extra=["../testdrive:/workdir/testdrive"]),
]
示例#21
0
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

from materialize.mzcompose import Composition, WorkflowArgumentParser
from materialize.mzcompose.services import Materialized, Postgres, TestCerts, Testdrive

SERVICES = [
    Materialized(volumes_extra=["secrets:/share/secrets"]),
    Testdrive(volumes_extra=["secrets:/share/secrets"]),
    TestCerts(),
    Postgres(),
]


def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "filter",
        nargs="*",
        default=["*.td"],
        help="limit to only the files matching filter",
    )
    args = parser.parse_args()

    c.up("materialized", "test-certs", "testdrive-svc", "postgres")
    c.wait_for_materialized()
示例#22
0
    @classmethod
    def body(cls) -> None:
        print(
            f"> CREATE MATERIALIZED VIEW v1 AS SELECT generate_series AS f1, generate_series AS f2 FROM (SELECT * FROM generate_series(1, {cls.COUNT}));"
        )
        print(
            f"> SELECT COUNT(*) FROM v1 AS a1 LEFT JOIN v1 AS a2 USING (f1);")
        print(f"{cls.COUNT}")


SERVICES = [
    Zookeeper(),
    Kafka(),
    SchemaRegistry(),
    Materialized(
        memory="8G",
        options="--persistent-user-tables --persistent-kafka-sources"),
    Testdrive(),
]


def workflow_default(c: Composition) -> None:
    c.start_and_wait_for_tcp(
        services=["zookeeper", "kafka", "schema-registry", "materialized"])

    with tempfile.NamedTemporaryFile(mode="w", dir=c.path) as tmp:
        with contextlib.redirect_stdout(tmp):
            [cls.generate() for cls in Generator.__subclasses__()]
            sys.stdout.flush()
            c.run("testdrive-svc", os.path.basename(tmp.name))
示例#23
0
    @classmethod
    def body(cls) -> None:
        print(
            f"> CREATE MATERIALIZED VIEW v1 AS SELECT generate_series AS f1, generate_series AS f2 FROM (SELECT * FROM generate_series(1, {cls.COUNT}));"
        )
        print(
            f"> SELECT COUNT(*) FROM v1 AS a1 LEFT JOIN v1 AS a2 USING (f1);")
        print(f"{cls.COUNT}")


SERVICES = [
    Zookeeper(),
    Kafka(),
    SchemaRegistry(),
    Materialized(memory="8G"),
    Testdrive(default_timeout="60s"),
]


def run_test(c: Composition, args: argparse.Namespace) -> None:
    c.up("testdrive", persistent=True)

    scenarios = ([globals()[args.scenario]]
                 if args.scenario else Generator.__subclasses__())

    for scenario in scenarios:
        with tempfile.NamedTemporaryFile(mode="w", dir=c.path) as tmp:
            with contextlib.redirect_stdout(tmp):
                scenario.generate()
                sys.stdout.flush()
示例#24
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--num-seconds",
        type=int,
        default=100,
        help="number of seconds to write records to Kafka",
    )
    parser.add_argument(
        "--records-per-second",
        type=int,
        default=10000,
        help="throughput of writes to maintain during testing",
    )
    parser.add_argument("--num-keys",
                        type=int,
                        default=1000000000,
                        help="number of distinct keys")
    parser.add_argument("--value-bytes",
                        type=int,
                        default=500,
                        help="record payload size in bytes")
    parser.add_argument(
        "--upsert",
        action="store_true",
        help="whether to use envelope UPSERT (True) or NONE (False)",
    )
    parser.add_argument("--timeout-secs",
                        type=int,
                        default=120,
                        help="timeout to send records to Kafka")
    parser.add_argument(
        "--enable-persistence",
        action="store_true",
        help="whether or not to enable persistence on materialized",
    )
    parser.add_argument(
        "--s3-storage",
        type=str,
        default=None,
        help=
        "enables s3 persist storage, pointed at the given subpath of our internal testing bucket",
    )
    parser.add_argument(
        "--workers",
        type=int,
        default=None,
        help="number of dataflow workers to use in materialized",
    )
    args = parser.parse_args()

    envelope = "NONE"
    if args.upsert:
        envelope = "UPSERT"

    options = []
    if args.enable_persistence:
        options = [
            "--persistent-user-tables",
            "--persistent-kafka-sources",
            "--disable-persistent-system-tables-test",
        ]

    if args.s3_storage == "":
        print("--s3-storage value must be non-empty", file=sys.stderr)
        sys.exit(1)
    elif args.s3_storage:
        if args.enable_persistence is not True:
            print(
                "cannot specifiy --s3-storage without --enable-persistence",
                file=sys.stderr,
            )
            sys.exit(1)
        options.extend([
            "--persist-storage-enabled",
            f"--persist-storage=s3://mtlz-test-persist-1d-lifecycle-delete/{args.s3_storage}",
        ])

    override = [
        Materialized(
            workers=args.workers,
            timestamp_frequency="1s",
            options=options,
        )
    ]

    with c.override(*override):
        c.start_and_wait_for_tcp(services=prerequisites)

        c.up("materialized")
        c.wait_for_materialized("materialized")

        c.run(
            "testdrive-svc",
            f"--var=envelope={envelope}",
            "setup.td",
        )

        start = time.monotonic()
        records_sent = 0
        total_records_to_send = args.records_per_second * args.num_seconds
        # Maximum observed delta between records sent by the benchmark and ingested by
        # Materialize.
        max_lag = 0
        last_reported_time = 0.0

        while True:
            elapsed = time.monotonic() - start
            records_ingested = query_materialize(c)

            lag = records_sent - records_ingested

            if lag > max_lag:
                max_lag = lag

            # Report our findings back once per second.
            if elapsed - last_reported_time > 1:
                print(
                    f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records"
                )
                last_reported_time = elapsed

            # Determine how many records we are scheduled to send, based on how long
            # the benchmark has been running and the desired QPS.
            records_scheduled = int(
                min(elapsed, args.num_seconds) * args.records_per_second)
            records_to_send = records_scheduled - records_sent

            if records_to_send > 0:
                send_records(
                    c,
                    num_records=records_to_send,
                    num_keys=args.num_keys,
                    value_bytes=args.value_bytes,
                    timeout_secs=args.timeout_secs,
                )
                records_sent = records_scheduled

            # Exit once we've sent all the records we need to send, and confirmed that
            # Materialize has ingested them.
            if records_sent == total_records_to_send == records_ingested:
                print(
                    f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records."
                )
                break
示例#25
0
    Testdrive,
    Zookeeper,
)

# All released Materialize versions, in order from most to least recent.
all_versions = util.known_materialize_versions()

# The `materialized` options that are valid only at or above a certain version.
mz_options = {Version.parse("0.9.2"): "--persistent-user-tables"}

SERVICES = [
    Zookeeper(),
    Kafka(),
    SchemaRegistry(),
    Postgres(),
    Materialized(options=" ".join(mz_options.values())),
    # N.B.: we need to use `validate_catalog=False` because testdrive uses HEAD
    # to load the catalog from disk but does *not* run migrations. There is no
    # guarantee that HEAD can load an old catalog without running migrations.
    #
    # When testdrive is targeting a HEAD materialized, we re-enable catalog
    # validation below by manually passing the `--validate-catalog` flag.
    #
    # Disabling catalog validation is preferable to using a versioned testdrive
    # because that would involve maintaining backwards compatibility for all
    # testdrive commands.
    Testdrive(validate_catalog=False),
]


def workflow_upgrade(c: Composition, parser: WorkflowArgumentParser) -> None:
示例#26
0
def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
    parser.add_argument(
        "--num-seconds",
        type=int,
        default=100,
        help="number of seconds to write records to Kafka",
    )
    parser.add_argument(
        "--records-per-second",
        type=int,
        default=10000,
        help="throughput of writes to maintain during testing",
    )
    parser.add_argument("--num-keys",
                        type=int,
                        default=1000000000,
                        help="number of distinct keys")
    parser.add_argument("--value-bytes",
                        type=int,
                        default=500,
                        help="record payload size in bytes")
    parser.add_argument("--timeout-secs",
                        type=int,
                        default=120,
                        help="timeout to send records to Kafka")
    parser.add_argument(
        "--blob-url",
        type=str,
        default=None,
        help="location where we store persistent data",
    )
    parser.add_argument(
        "--consensus-url",
        type=str,
        default=None,
        help="location where we store persistent data",
    )
    args = parser.parse_args()

    options = [
        "--persist-consensus-url",
        f"{args.consensus_url}",
        "--persist-blob-url",
        f"{args.blob_url}",
    ]

    override = [Materialized(options=options)]

    with c.override(*override):
        c.start_and_wait_for_tcp(services=prerequisites)

        c.up("materialized")
        c.wait_for_materialized("materialized")

        c.run(
            "testdrive",
            "setup.td",
        )

        start = time.monotonic()
        records_sent = 0
        total_records_to_send = args.records_per_second * args.num_seconds
        # Maximum observed delta between records sent by the benchmark and ingested by
        # Materialize.
        max_lag = 0
        last_reported_time = 0.0

        while True:
            elapsed = time.monotonic() - start
            records_ingested = query_materialize(c)

            lag = records_sent - records_ingested

            if lag > max_lag:
                max_lag = lag

            # Report our findings back once per second.
            if elapsed - last_reported_time > 1:
                print(
                    f"C> after {elapsed:.3f}s sent {records_sent} records, and ingested {records_ingested}. max observed lag {max_lag} records, most recent lag {lag} records"
                )
                last_reported_time = elapsed

            # Determine how many records we are scheduled to send, based on how long
            # the benchmark has been running and the desired QPS.
            records_scheduled = int(
                min(elapsed, args.num_seconds) * args.records_per_second)
            records_to_send = records_scheduled - records_sent

            if records_to_send > 0:
                send_records(
                    c,
                    num_records=records_to_send,
                    num_keys=args.num_keys,
                    value_bytes=args.value_bytes,
                    timeout_secs=args.timeout_secs,
                )
                records_sent = records_scheduled

            # Exit once we've sent all the records we need to send, and confirmed that
            # Materialize has ingested them.
            if records_sent == total_records_to_send == records_ingested:
                print(
                    f"C> Finished after {elapsed:.3f}s sent and ingested {records_sent} records. max observed lag {max_lag} records."
                )
                break
示例#27
0
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

from materialize.mzcompose import Composition
from materialize.mzcompose.services import Materialized, Postgres, Testdrive, Toxiproxy

SERVICES = [
    Materialized(),
    Postgres(),
    Toxiproxy(),
    Testdrive(no_reset=True, default_timeout="60s"),
]


def workflow_pg_cdc_resumption(c: Composition) -> None:
    """Test Postgres direct replication's failure handling by
    disrupting replication at various stages using Toxiproxy or service restarts
    """

    initialize(c)

    for scenario in [
            disconnect_pg_during_snapshot,
            disconnect_pg_during_replication,
            restart_pg_during_snapshot,
示例#28
0
)
from materialize.mzcompose.services import Testdrive as TestdriveService

SERVICES = [
    Postgres(name="postgres-backend"),
    Postgres(name="postgres-source"),
    Redpanda(auto_create_topics=True),
    Debezium(),
    Computed(
        name="computed_1"
    ),  # Started by some Scenarios, defined here only for the teardown
    Materialized(
        options=" ".join(
            [
                "--persist-consensus-url=postgresql://postgres:postgres@postgres-backend:5432?options=--search_path=consensus",
                "--storage-stash-url=postgresql://postgres:postgres@postgres-backend:5432?options=--search_path=storage",
                "--adapter-stash-url=postgresql://postgres:postgres@postgres-backend:5432?options=--search_path=adapter",
            ]
        )
    ),
    TestdriveService(default_timeout="300s", no_reset=True, seed=1),
]


class ExecutionMode(Enum):
    ALLTOGETHER = "alltogether"
    ONEATATIME = "oneatatime"

    def __str__(self) -> str:
        return self.value
示例#29
0
# == Services ==

# In buildkite this script is executed in a read-only directory so we can't use
# cwd. Directories in `/tmp` end up empty inside of the running docker
# container, perhaps because of docker-in-docker and external permissions.
#
# This works in both local dev and buildkite.
LOCAL_DIR = (Path(__file__).parent / f"mzcompose-aws-config-{SEED}").resolve()

AWS_VOLUME = [f"{LOCAL_DIR}:/root/.aws"]

SERVICES = [
    Materialized(
        forward_aws_credentials=False,
        environment_extra=[f"AWS_EC2_METADATA_SERVICE_ENDPOINT={DISCARD}"],
        volumes_extra=AWS_VOLUME,
    ),
    Testdrive(
        materialize_url=f"postgres://materialize@materialized:6875",
        seed=SEED,
    ),
]

# Service overrides for specifying external id

MZ_EID = Materialized(
    forward_aws_credentials=False,
    options=f"--aws-external-id={EXTERNAL_ID}",
    environment_extra=[f"AWS_EC2_METADATA_SERVICE_ENDPOINT={DISCARD}"],
    volumes_extra=AWS_VOLUME,
示例#30
0
         "SCHEMA_REGISTRY_SSL_CLIENT_AUTH=true",
         "SCHEMA_REGISTRY_AUTHENTICATION_METHOD=BASIC",
         "SCHEMA_REGISTRY_AUTHENTICATION_ROLES=user",
         "SCHEMA_REGISTRY_AUTHENTICATION_REALM=SchemaRegistry",
         "SCHEMA_REGISTRY_OPTS=-Djava.security.auth.login.config=/etc/schema-registry/jaas_config.conf",
     ],
     volumes=[
         "secrets:/etc/schema-registry/secrets",
         "./jaas_config.conf:/etc/schema-registry/jaas_config.conf",
         "./users.properties:/etc/schema-registry/users.properties",
     ],
     bootstrap_server_type="SSL",
 ),
 Materialized(
     environment=[
         "SSL_KEY_PASSWORD=mzmzmz",
     ],
     volumes_extra=["secrets:/share/secrets"],
 ),
 Testdrive(
     entrypoint=[
         "bash",
         "-c",
         "cp /share/secrets/ca.crt /usr/local/share/ca-certificates/ca.crt && "
         "update-ca-certificates && "
         "testdrive "
         "--kafka-addr=kafka:9092 "
         "--schema-registry-url=https://schema-registry:8081 "
         "--materialized-url=postgres://materialize@materialized:6875 "
         "--cert=/share/secrets/producer.p12 "
         "--cert-password=mzmzmz "
         "--ccsr-password=sekurity "