Exemplo n.º 1
0
def helm_namespace(
    cluster_provider, request
):  # pylint: disable=unused-argument, redefined-outer-name
    '''If an existing Helm chart namespace is specified via pytest CLI with the argument
    --existing-helm-namespace, we will use that chart.

    Otherwise, provision a test namespace and install Helm chart into that namespace.

    Yields the Helm chart namespace.
    '''

    existing_helm_namespace = request.config.getoption('--existing-helm-namespace')

    if existing_helm_namespace:
        yield existing_helm_namespace

    else:
        # Never bother cleaning up on Buildkite
        if IS_BUILDKITE:
            should_cleanup = False
        # Otherwise, always clean up unless --no-cleanup specified
        else:
            should_cleanup = not request.config.getoption('--no-cleanup')

        with test_namespace(should_cleanup) as namespace:
            with helm_test_resources(namespace, should_cleanup):
                docker_image = test_project_docker_image()
                with helm_chart(namespace, docker_image, should_cleanup):
                    print('Helm chart successfully installed in namespace %s' % namespace)
                    yield namespace
Exemplo n.º 2
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        pod_name=pod_name,
        component='run_coordinator',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='',
        ).strip())
Exemplo n.º 3
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster"],
        args=["api", "execute_run_with_structured_logs"],
        job_name=job_name,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="",
        ).strip())
Exemplo n.º 4
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    environment_dict = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name,
                      environment_dict=environment_dict)

    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_graphql_k8s_job(
        run_launcher.job_config,
        args=[
            '-p',
            'startPipelineExecutionForCreatedRun',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
        ).strip())
Exemplo n.º 5
0
def _helm_namespace_helper(helm_chart_fn, request):
    """If an existing Helm chart namespace is specified via pytest CLI with the argument
    --existing-helm-namespace, we will use that chart.

    Otherwise, provision a test namespace and install Helm chart into that namespace.

    Yields the Helm chart namespace.
    """
    existing_helm_namespace = request.config.getoption(
        "--existing-helm-namespace")

    if existing_helm_namespace:
        yield existing_helm_namespace

    else:
        # Never bother cleaning up on Buildkite
        if IS_BUILDKITE:
            should_cleanup = False
        # Otherwise, always clean up unless --no-cleanup specified
        else:
            should_cleanup = not request.config.getoption("--no-cleanup")

        with test_namespace(should_cleanup) as namespace:
            with helm_test_resources(namespace, should_cleanup):
                docker_image = test_project_docker_image()
                with helm_chart_fn(namespace, docker_image, should_cleanup):
                    print("Helm chart successfully installed in namespace %s" %
                          namespace)
                    yield namespace
Exemplo n.º 6
0
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        "image": docker_image,
        "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client("ecr", region_name="us-west-1")
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token["authorizationData"][0]
            ["authorizationToken"]).decode().split(":"))
        registry = token["authorizationData"][0]["proxyEndpoint"]

        docker_config["registry"] = {
            "url": registry,
            "username": username,
            "password": password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                "Found existing image tagged {image}, skipping image build. To rebuild, first run: "
                "docker rmi {image}".format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), "env.yaml"),
                os.path.join(test_project_environments_path(), "env_s3.yaml"),
            ]),
            {
                "execution": {
                    "celery-docker": {
                        "config": {
                            "docker": docker_config,
                            "config_source": {
                                "task_always_eager": True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline("docker_celery_pipeline"),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
Exemplo n.º 7
0
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        'image': docker_image,
        'env_vars': ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client('ecr', region_name='us-west-1')
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token['authorizationData'][0]
            ['authorizationToken']).decode().split(':'))
        registry = token['authorizationData'][0]['proxyEndpoint']

        docker_config['registry'] = {
            'url': registry,
            'username': username,
            'password': password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                'Found existing image tagged {image}, skipping image build. To rebuild, first run: '
                'docker rmi {image}'.format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), 'env.yaml'),
                os.path.join(test_project_environments_path(), 'env_s3.yaml'),
            ]),
            {
                'execution': {
                    'celery-docker': {
                        'config': {
                            'docker': docker_config,
                            'config_source': {
                                'task_always_eager': True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline('docker_celery_pipeline'),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
Exemplo n.º 8
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            "requests": {
                "cpu": "250m",
                "memory": "64Mi"
            },
            "limits": {
                "cpu": "500m",
                "memory": "2560Mi"
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster-graphql"],
        args=[
            "-p",
            "executeRunInProcess",
            "-v",
            seven.json.dumps({"runId": run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
        ).strip())
Exemplo n.º 9
0
def test_valid_job_format_with_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            'requests': {
                'cpu': '250m',
                'memory': '64Mi'
            },
            'limits': {
                'cpu': '500m',
                'memory': '2560Mi'
            },
        })
    })
    resources = get_k8s_resource_requirements(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        resources=resources,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
        ).strip())
Exemplo n.º 10
0
    def _cluster_provider(request):
        from .kind import kind_cluster_exists, kind_cluster, kind_load_images

        if IS_BUILDKITE:
            print("Installing ECR credentials...")
            check_output(
                "aws ecr get-login --no-include-email --region us-west-1 | sh",
                shell=True)

        provider = request.config.getoption("--cluster-provider")

        # Use a kind cluster
        if provider == "kind":
            cluster_name = request.config.getoption("--kind-cluster")

            # Cluster will be deleted afterwards unless this is set.
            # This is to allow users to reuse an existing cluster in local test by running
            # `pytest --kind-cluster my-cluster --no-cleanup` -- this avoids the per-test run
            # overhead of cluster setup and teardown
            should_cleanup = True if IS_BUILDKITE else not request.config.getoption(
                "--no-cleanup")

            existing_cluster = kind_cluster_exists(cluster_name)

            with kind_cluster(cluster_name,
                              should_cleanup=should_cleanup) as cluster_config:
                if not IS_BUILDKITE and not existing_cluster:
                    docker_image = test_project_docker_image()
                    try:
                        client = docker.from_env()
                        client.images.get(docker_image)
                        print(  # pylint: disable=print-call
                            "Found existing image tagged {image}, skipping image build. To rebuild, first run: "
                            "docker rmi {image}".format(image=docker_image))
                    except docker.errors.ImageNotFound:
                        build_and_tag_test_image(docker_image)
                    kind_load_images(
                        cluster_name=cluster_config.name,
                        local_dagster_test_image=docker_image,
                        additional_images=additional_kind_images,
                    )
                yield cluster_config

        # Use cluster from kubeconfig
        elif provider == "kubeconfig":
            kubeconfig_file = os.getenv(
                "KUBECONFIG", os.path.expandvars("${HOME}/.kube/config"))
            kubernetes.config.load_kube_config(config_file=kubeconfig_file)
            yield ClusterConfig(name="from_system_kubeconfig",
                                kubeconfig_file=kubeconfig_file)

        else:
            raise Exception("unknown cluster provider %s" % provider)
Exemplo n.º 11
0
def dagster_docker_image():
    docker_image = test_project_docker_image()

    if not IS_BUILDKITE:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                "Found existing image tagged {image}, skipping image build. To rebuild, first run: "
                "docker rmi {image}".format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    return docker_image
Exemplo n.º 12
0
    def _cluster_provider(request):
        from .kind import kind_cluster_exists, kind_cluster, kind_load_images

        if IS_BUILDKITE:
            print('Installing ECR credentials...')
            check_output(
                'aws ecr get-login --no-include-email --region us-west-1 | sh',
                shell=True)

        provider = request.config.getoption('--cluster-provider')

        # Use a kind cluster
        if provider == 'kind':
            cluster_name = request.config.getoption('--kind-cluster')

            # Cluster will be deleted afterwards unless this is set.
            # This is to allow users to reuse an existing cluster in local test by running
            # `pytest --kind-cluster my-cluster --no-cleanup` -- this avoids the per-test run
            # overhead of cluster setup and teardown
            should_cleanup = True if IS_BUILDKITE else not request.config.getoption(
                '--no-cleanup')

            existing_cluster = kind_cluster_exists(cluster_name)

            with kind_cluster(cluster_name,
                              should_cleanup=should_cleanup) as cluster_config:
                if not IS_BUILDKITE and not existing_cluster:
                    docker_image = test_project_docker_image()
                    build_and_tag_test_image(docker_image)
                    kind_load_images(
                        cluster_name=cluster_config.name,
                        local_dagster_test_image=docker_image,
                        additional_images=additional_kind_images,
                    )
                yield cluster_config

        # Use cluster from kubeconfig
        elif provider == 'kubeconfig':
            kubeconfig_file = os.getenv(
                'KUBECONFIG', os.path.expandvars('${HOME}/.kube/config'))
            kubernetes.config.load_kube_config(config_file=kubeconfig_file)
            yield ClusterConfig(name='from_system_kubeconfig',
                                kubeconfig_file=kubeconfig_file)

        else:
            raise Exception('unknown cluster provider %s' % provider)
Exemplo n.º 13
0
def k8s_scheduler(cluster_provider, helm_namespace):  # pylint: disable=redefined-outer-name,unused-argument
    return K8sScheduler(
        scheduler_namespace=helm_namespace,
        image_pull_secrets=[{
            "name": "element-dev-key"
        }],
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image=test_project_docker_image(),
        load_incluster_config=False,
        kubeconfig_file=cluster_provider.kubeconfig_file,
        image_pull_policy=image_pull_policy(),
        env_config_maps=["dagster-pipeline-env", "test-env-configmap"],
        env_secrets=["test-env-secret"],
    )
Exemplo n.º 14
0
def run_launcher(cluster_provider, helm_namespace):  # pylint: disable=redefined-outer-name,unused-argument

    return K8sRunLauncher(
        image_pull_secrets=[{
            'name': 'element-dev-key'
        }],
        service_account_name='dagit-admin',
        instance_config_map='dagster-instance',
        postgres_password_secret='dagster-postgresql-secret',
        dagster_home='/opt/dagster/dagster_home',
        job_image=test_project_docker_image(),
        load_incluster_config=False,
        kubeconfig_file=cluster_provider.kubeconfig_file,
        image_pull_policy=image_pull_policy(),
        job_namespace=helm_namespace,
        env_config_maps=['dagster-pipeline-env', 'test-env-configmap'],
        env_secrets=['test-env-secret'],
    )
Exemplo n.º 15
0
def test_valid_job_format_with_user_defined_k8s_config(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        USER_DEFINED_K8S_CONFIG_KEY: ({
            "container_config": {
                "resources": {
                    "requests": {
                        "cpu": "250m",
                        "memory": "64Mi"
                    },
                    "limits": {
                        "cpu": "500m",
                        "memory": "2560Mi"
                    },
                }
            },
            "pod_template_spec_metadata": {
                "annotations": {
                    "cluster-autoscaler.kubernetes.io/safe-to-evict": "true"
                },
                "labels": {
                    "spotinst.io/restrict-scale-down": "true"
                },
            },
            "pod_spec_config": {
                "affinity": {
                    "nodeAffinity": {
                        "requiredDuringSchedulingIgnoredDuringExecution": {
                            "nodeSelectorTerms": [{
                                "matchExpressions": [{
                                    "key":
                                    "kubernetes.io/e2e-az-name",
                                    "operator":
                                    "In",
                                    "values": ["e2e-az1", "e2e-az2"],
                                }]
                            }]
                        }
                    }
                }
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster"],
        args=["api", "execute_run_with_structured_logs"],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(remove_none_recursively(job.to_dict()),
                      default_flow_style=False).strip() ==
            EXPECTED_CONFIGURED_JOB_SPEC.format(
                run_id=run.run_id,
                job_image=docker_image,
                image_pull_policy=image_pull_policy(),
                dagster_version=dagster_version,
                labels="spotinst.io/restrict-scale-down: 'true'",
                resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
                annotations="""annotations:
        cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'""",
                affinity="""affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/e2e-az-name
                operator: In
                values:
                - e2e-az1
                - e2e-az2""",
            ).strip())
Exemplo n.º 16
0
def test_valid_job_format_with_user_defined_k8s_config(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        USER_DEFINED_K8S_CONFIG_KEY: ({
            'container_config': {
                'resources': {
                    'requests': {
                        'cpu': '250m',
                        'memory': '64Mi'
                    },
                    'limits': {
                        'cpu': '500m',
                        'memory': '2560Mi'
                    },
                }
            },
            'pod_template_spec_metadata': {
                'annotations': {
                    "cluster-autoscaler.kubernetes.io/safe-to-evict": "true"
                }
            },
            'pod_spec_config': {
                'affinity': {
                    'nodeAffinity': {
                        'requiredDuringSchedulingIgnoredDuringExecution': {
                            'nodeSelectorTerms': [{
                                'matchExpressions': [{
                                    'key':
                                    'kubernetes.io/e2e-az-name',
                                    'operator':
                                    'In',
                                    'values': ['e2e-az1', 'e2e-az2'],
                                }]
                            }]
                        }
                    }
                }
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component='run_coordinator',
    )

    assert (yaml.dump(remove_none_recursively(job.to_dict()),
                      default_flow_style=False).strip() ==
            EXPECTED_CONFIGURED_JOB_SPEC.format(
                run_id=run.run_id,
                job_image=docker_image,
                image_pull_policy=image_pull_policy(),
                dagster_version=dagster_version,
                resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
                annotations='''annotations:
        cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'''',
                affinity='''affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/e2e-az-name
                operator: In
                values:
                - e2e-az1
                - e2e-az2''',
            ).strip())