Пример #1
0
def test_execute_execute_plan_mutation_raw():
    pipeline_name = 'sleepy_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_examples.toys.sleepy', pipeline_name)
    pipeline = handle.build_pipeline_definition()
    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(pipeline=pipeline)
    variables = {
        'executionParams': {
            'environmentConfigData': {},
            'mode': 'default',
            'selector': {
                'name': pipeline_name
            },
            'executionMetadata': {
                'runId': pipeline_run.run_id
            },
        }
    }
    result = execute_execute_plan_mutation_raw(handle,
                                               variables,
                                               instance_ref=instance.get_ref())
    seen_events = set()
    for event in result:
        seen_events.add((event.dagster_event.event_type_value, event.step_key))

    assert seen_events == EXPECTED_EVENTS
Пример #2
0
def make_airflow_dag_kubernetized(
    module_name,
    pipeline_name,
    image,
    namespace,
    environment_dict=None,
    mode=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
):
    from .operators.kubernetes_operator import DagsterKubernetesPodOperator

    check.str_param(module_name, 'module_name')

    handle = ExecutionTargetHandle.for_pipeline_module(module_name,
                                                       pipeline_name)

    # See: https://github.com/dagster-io/dagster/issues/1663
    op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str)
    op_kwargs['image'] = image
    op_kwargs['namespace'] = namespace

    return _make_airflow_dag(
        handle=handle,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        mode=mode,
        dag_id=dag_id,
        dag_description=dag_description,
        dag_kwargs=dag_kwargs,
        op_kwargs=op_kwargs,
        operator=DagsterKubernetesPodOperator,
    )
Пример #3
0
def test_repo_module_dynamic_load():
    repository = ExecutionTargetHandle.for_pipeline_module(
        module_name='dagster_examples.intro_tutorial.repos',
        fn_name='define_repo_demo_pipeline').build_repository_definition()

    assert isinstance(repository, RepositoryDefinition)
    assert repository.name == EPHEMERAL_NAME
Пример #4
0
def make_airflow_dag_containerized(
    module_name,
    pipeline_name,
    image,
    environment_dict=None,
    mode=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
):
    check.str_param(module_name, 'module_name')

    handle = ExecutionTargetHandle.for_pipeline_module(module_name,
                                                       pipeline_name)

    op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str)
    op_kwargs['image'] = image
    return _make_airflow_dag(
        handle=handle,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        mode=mode,
        dag_id=dag_id,
        dag_description=dag_description,
        dag_kwargs=dag_kwargs,
        op_kwargs=op_kwargs,
        operator=DagsterDockerOperator,
    )
Пример #5
0
def test_dask_cluster(dask_address, s3_bucket):
    # https://github.com/dagster-io/dagster/issues/1748
    with pytest.raises(check.CheckError,
                       match='Must use remote DagsterInstance'):

        _result = execute_pipeline(
            ExecutionTargetHandle.for_pipeline_module(
                'dagster_examples.toys.hammer',
                'hammer_pipeline').build_pipeline_definition(),
            environment_dict={
                'storage': {
                    's3': {
                        'config': {
                            's3_bucket': s3_bucket
                        }
                    }
                },
                'execution': {
                    'dask': {
                        'config': {
                            'address': '%s:8786' % dask_address
                        }
                    }
                },
            },
            # needs to become remote to work
            instance=DagsterInstance.local_temp(),
        )
Пример #6
0
class TestExecuteDagContainerizedS3Storage(object):
    pipeline_name = 'demo_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo',
        pipeline_name)
    environment_yaml = [
        script_relative_path('test_project/env.yaml'),
        script_relative_path('test_project/env_s3.yaml'),
    ]
    run_id = str(uuid.uuid4())
    execution_date = datetime.datetime.utcnow()
    image = IMAGE

    # pylint: disable=redefined-outer-name
    def test_execute_dag_containerized(
            self, dagster_airflow_docker_operator_pipeline):
        for result in dagster_airflow_docker_operator_pipeline:
            assert 'data' in result
            assert 'executePlan' in result['data']
            assert '__typename' in result['data']['executePlan']
            assert result['data']['executePlan'][
                '__typename'] == 'ExecutePlanSuccess'
            result = list(
                filter(
                    lambda x: x['__typename'] == 'ExecutionStepOutputEvent',
                    result['data']['executePlan']['stepEvents'],
                ))[0]
            if result['step']['kind'] == 'INPUT_THUNK':
                continue
Пример #7
0
def test_error_dag_k8s():
    pipeline_name = 'demo_error_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo',
        pipeline_name)
    environment_yaml = [
        script_relative_path('test_project/env_s3.yaml'),
    ]
    environment_dict = load_yaml_from_glob_list(environment_yaml)

    run_id = str(uuid.uuid4())
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_kubernetized_for_handle(
        handle=handle,
        pipeline_name=pipeline_name,
        image=IMAGE,
        namespace='default',
        environment_dict=environment_dict,
    )

    with pytest.raises(AirflowException) as exc_info:
        execute_tasks_in_dag(dag, tasks, run_id, execution_date)

    assert 'Exception: Unusual error' in str(exc_info.value)
Пример #8
0
def test_error_dag_k8s(dagster_docker_image, cluster_provider):
    print('--- :airflow: test_kubernetes.test_error_dag_k8s')
    _check_aws_creds_available()

    pipeline_name = 'demo_error_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module('test_pipelines.repo',
                                                       pipeline_name)
    environments_path = test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, 'env_s3.yaml'),
    ]
    environment_dict = load_yaml_from_glob_list(environment_yaml)

    run_id = make_new_run_id()
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_kubernetized_for_handle(
        handle=handle,
        pipeline_name=pipeline_name,
        image=dagster_docker_image,
        namespace='default',
        environment_dict=environment_dict,
        op_kwargs={
            'config_file': os.environ['KUBECONFIG'],
            'env_vars': {
                'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'],
                'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'],
            },
        },
    )

    with pytest.raises(AirflowException) as exc_info:
        execute_tasks_in_dag(dag, tasks, run_id, execution_date)

    assert 'Exception: Unusual error' in str(exc_info.value)
Пример #9
0
def test_s3_storage(dagster_airflow_k8s_operator_pipeline,
                    dagster_docker_image, cluster_provider):
    print('--- :airflow: test_kubernetes.test_s3_storage')
    _check_aws_creds_available()
    environments_path = test_project_environments_path()

    pipeline_name = 'demo_pipeline'
    results = dagster_airflow_k8s_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ExecutionTargetHandle.for_pipeline_module(
            'test_pipelines.repo', pipeline_name),
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path, 'env_s3.yaml'),
        ],
        image=dagster_docker_image,
        op_kwargs={
            'config_file': os.environ['KUBECONFIG'],
            'env_vars': {
                'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'],
                'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'],
            },
        },
    )
    validate_pipeline_execution(results)
Пример #10
0
def make_airflow_dag(
    module_name,
    pipeline_name,
    environment_dict=None,
    mode=None,
    instance=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
):
    check.str_param(module_name, 'module_name')

    handle = ExecutionTargetHandle.for_pipeline_module(module_name,
                                                       pipeline_name)

    return _make_airflow_dag(
        handle=handle,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        mode=mode,
        instance=instance,
        dag_id=dag_id,
        dag_description=dag_description,
        dag_kwargs=dag_kwargs,
        op_kwargs=op_kwargs,
    )
Пример #11
0
class TestExecuteSkipsContainerized(object):
    pipeline_name = 'optional_outputs'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo',
        pipeline_name)
    environment_yaml = [
        script_relative_path('test_project/env_filesystem.yaml')
    ]
    run_id = str(uuid.uuid4())
    execution_date = datetime.datetime.utcnow()
    op_kwargs = {'host_tmp_dir': '/tmp'}
    image = IMAGE

    # pylint: disable=redefined-outer-name
    def test_execute_dag_containerized(
            self, dagster_airflow_docker_operator_pipeline):
        expected_airflow_task_states = {
            ('foo', None),
            ('first_consumer', None),
            ('second_consumer', 'skipped'),
            ('third_consumer', 'skipped'),
        }

        seen = {(ti.task_id, ti.current_state())
                for ti in dagster_airflow_docker_operator_pipeline.keys()}
        assert seen == expected_airflow_task_states
Пример #12
0
def test_error_dag_k8s(
    dagster_docker_image, environments_path
):  # pylint: disable=redefined-outer-name
    pipeline_name = 'demo_error_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name)
    environment_yaml = [
        os.path.join(environments_path, 'env_s3.yaml'),
    ]
    environment_dict = load_yaml_from_glob_list(environment_yaml)

    run_id = make_new_run_id()
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_kubernetized_for_handle(
        handle=handle,
        pipeline_name=pipeline_name,
        image=dagster_docker_image,
        namespace='default',
        environment_dict=environment_dict,
    )

    with pytest.raises(AirflowException) as exc_info:
        execute_tasks_in_dag(dag, tasks, run_id, execution_date)

    assert 'Exception: Unusual error' in str(exc_info.value)
Пример #13
0
def test_my_custom_operator(
    dagster_airflow_custom_operator_pipeline, caplog,
):  # pylint: disable=redefined-outer-name
    caplog.set_level(logging.INFO, logger='CustomOperatorLogger')
    pipeline_name = 'demo_pipeline'
    operator = CustomOperator

    environments_path = test_project_environments_path()

    results = dagster_airflow_custom_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ExecutionTargetHandle.for_pipeline_module('test_pipelines.repo', pipeline_name),
        operator=operator,
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path, 'env_filesystem_no_explicit_base_dir.yaml'),
        ],
    )
    validate_pipeline_execution(results)

    log_lines = 0
    for record in caplog.records:
        if record.name == 'CustomOperatorLogger':
            log_lines += 1
            assert record.message == 'CustomOperator is called'

    assert log_lines == 2
Пример #14
0
def test_repo_module_dynamic_load():
    handle = ExecutionTargetHandle.for_pipeline_module(
        module_name='dagster_examples.intro_tutorial.repos',
        fn_name='hello_cereal_pipeline')
    repository = handle.build_repository_definition()

    assert isinstance(repository, RepositoryDefinition)
    assert repository.name == EPHEMERAL_NAME
    assert ExecutionTargetHandle.get_handle(repository) == (handle, None)
Пример #15
0
def test_repo_module_dynamic_load_from_pipeline():
    repository = ExecutionTargetHandle.for_pipeline_module(
        module_name='dagster_examples.intro_tutorial.repos',
        fn_name='define_repo_demo_pipeline').build_repository_definition()

    assert isinstance(repository, RepositoryDefinition)
    assert repository.name == '<<unnamed>>'
    assert repository.get_pipeline(
        'repo_demo_pipeline').name == 'repo_demo_pipeline'
Пример #16
0
def test_skip_operator(
    dagster_airflow_python_operator_pipeline, environments_path,
):  # pylint: disable=redefined-outer-name
    pipeline_name = 'optional_outputs'
    results = dagster_airflow_python_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name),
        environment_yaml=[os.path.join(environments_path, 'env_filesystem.yaml')],
    )
    validate_skip_pipeline_execution(results)
Пример #17
0
class TestExecuteSkipsPythonOperator(object):
    pipeline_name = 'optional_outputs'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name
    )
    environment_yaml = [script_relative_path('test_project/env_filesystem.yaml')]
    run_id = str(uuid.uuid4())

    # pylint: disable=redefined-outer-name
    def test_execute_dag(self, dagster_airflow_python_operator_pipeline):
        validate_skip_pipeline_execution(dagster_airflow_python_operator_pipeline)
Пример #18
0
def test_repo_module_dynamic_load_from_pipeline():
    handle = ExecutionTargetHandle.for_pipeline_module(
        module_name='dagster_examples.intro_tutorial.repos', fn_name='hello_cereal_pipeline'
    )
    handle = ExecutionTargetHandle.from_dict(handle.to_dict())
    repository = handle.build_repository_definition()

    assert isinstance(repository, RepositoryDefinition)
    assert repository.name == '<<unnamed>>'
    assert repository.get_pipeline('hello_cereal_pipeline').name == 'hello_cereal_pipeline'
    assert ExecutionTargetHandle.get_handle(repository) == (handle, None)
Пример #19
0
def test_dask_cluster(dask_address):
    result = execute_pipeline(
        ExecutionTargetHandle.for_pipeline_module(
            'dagster_examples.toys.hammer', 'hammer_pipeline'
        ).build_pipeline_definition(),
        environment_dict={
            'storage': {'s3': {'config': {'s3_bucket': 'dagster-airflow-scratch'}}},
            'execution': {'dask': {'config': {'address': '%s:8786' % dask_address}}},
        },
    )
    assert result.success
    assert result.result_for_solid('reducer').output_value() == 4
Пример #20
0
def test_gcs_storage(
    dagster_airflow_python_operator_pipeline, environments_path,
):  # pylint: disable=redefined-outer-name
    pipeline_name = 'demo_pipeline_gcs'
    results = dagster_airflow_python_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name),
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path, 'env_gcs.yaml'),
        ],
    )
    validate_pipeline_execution(results)
Пример #21
0
class TestExecuteSkipsContainerized(object):
    pipeline_name = 'optional_outputs'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name
    )
    environment_yaml = [script_relative_path('test_project/env_filesystem.yaml')]
    run_id = str(uuid.uuid4())
    op_kwargs = {'host_tmp_dir': '/tmp'}
    image = IMAGE

    # pylint: disable=redefined-outer-name
    def test_execute_dag_containerized(self, dagster_airflow_docker_operator_pipeline):
        validate_skip_pipeline_execution(dagster_airflow_docker_operator_pipeline)
Пример #22
0
class TestExecuteDagPythonS3Storage(object):
    pipeline_name = 'demo_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo',
        pipeline_name)
    environment_yaml = [
        script_relative_path('test_project/env.yaml'),
        script_relative_path('test_project/env_s3.yaml'),
    ]
    run_id = str(uuid.uuid4())

    # pylint: disable=redefined-outer-name
    def test_execute_dag(self, dagster_airflow_python_operator_pipeline):
        validate_pipeline_execution(dagster_airflow_python_operator_pipeline)
Пример #23
0
class TestAirflowizedEventPipeline(object):
    config_yaml = [
        script_relative_path(
            '../../dagster_examples/airline_demo/environments/default.yaml')
    ]

    pipeline_name = 'event_ingest_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_examples.event_pipeline_demo', pipeline_name)

    # pylint: disable=redefined-outer-name
    def test_airflowized_event_pipeline(
            self, dagster_airflow_python_operator_pipeline):
        pass
Пример #24
0
def test_skip_operator(dagster_airflow_docker_operator_pipeline,
                       dagster_docker_image):  # pylint: disable=redefined-outer-name
    pipeline_name = 'optional_outputs'
    environments_path = test_project_environments_path()
    results = dagster_airflow_docker_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ExecutionTargetHandle.for_pipeline_module(
            'test_pipelines.repo', pipeline_name),
        environment_yaml=[
            os.path.join(environments_path, 'env_filesystem.yaml')
        ],
        op_kwargs={'host_tmp_dir': '/tmp'},
        image=dagster_docker_image,
    )
    validate_skip_pipeline_execution(results)
Пример #25
0
def test_s3_storage(dagster_airflow_docker_operator_pipeline,
                    dagster_docker_image):  # pylint: disable=redefined-outer-name
    pipeline_name = 'demo_pipeline'
    environments_path = test_project_environments_path()
    results = dagster_airflow_docker_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ExecutionTargetHandle.for_pipeline_module(
            'test_pipelines.repo', pipeline_name),
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path, 'env_s3.yaml'),
        ],
        image=dagster_docker_image,
    )
    validate_pipeline_execution(results)
Пример #26
0
def test_fs_storage_no_explicit_base_dir(
    dagster_airflow_python_operator_pipeline, ):  # pylint: disable=redefined-outer-name
    pipeline_name = 'demo_pipeline'
    environments_path = test_project_environments_path()
    results = dagster_airflow_python_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ExecutionTargetHandle.for_pipeline_module(
            'test_pipelines.repo', pipeline_name),
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path,
                         'env_filesystem_no_explicit_base_dir.yaml'),
        ],
    )
    validate_pipeline_execution(results)
Пример #27
0
def test_error_dag_python(environments_path):  # pylint: disable=redefined-outer-name
    pipeline_name = 'demo_error_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name)
    environment_yaml = [
        os.path.join(environments_path, 'env_filesystem.yaml'),
    ]
    environment_dict = load_yaml_from_glob_list(environment_yaml)
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_for_handle(handle, pipeline_name, environment_dict)

    with pytest.raises(AirflowException) as exc_info:
        execute_tasks_in_dag(dag, tasks, run_id=str(uuid.uuid4()), execution_date=execution_date)

    assert 'Exception: Unusual error' in str(exc_info.value)
Пример #28
0
def test_dask_cluster():
    result = execute_on_dask(
        ExecutionTargetHandle.for_pipeline_module(
            'dagster_examples.toys.hammer', 'define_hammer_pipeline'),
        env_config={
            'storage': {
                's3': {
                    's3_bucket': 'dagster-airflow-scratch'
                }
            }
        },
        run_config=RunConfig(storage_mode=RunStorageMode.S3),
        dask_config=DaskConfig(address='%s:8786' % os.getenv('DASK_ADDRESS')),
    )
    assert result.success
    assert result.result_for_solid('total').transformed_value() == 4
Пример #29
0
class TestExecuteDagKubernetizedGCSStorage(object):
    pipeline_name = 'demo_pipeline_gcs'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo',
        pipeline_name)
    environment_yaml = [
        script_relative_path('test_project/env.yaml'),
        script_relative_path('test_project/env_gcs.yaml'),
    ]
    run_id = str(uuid.uuid4())
    image = IMAGE

    # pylint: disable=redefined-outer-name
    def test_execute_dag_kubernetized(self,
                                      dagster_airflow_k8s_operator_pipeline):
        validate_pipeline_execution(dagster_airflow_k8s_operator_pipeline)
Пример #30
0
class TestExecuteDagContainerizedFilesystemStorageNoExplicitBaseDir(object):
    pipeline_name = 'demo_pipeline'
    handle = ExecutionTargetHandle.for_pipeline_module(
        'dagster_airflow_tests.test_project.dagster_airflow_demo',
        pipeline_name)
    environment_yaml = [
        script_relative_path('test_project/env.yaml'),
        script_relative_path(
            'test_project/env_filesystem_no_explicit_base_dir.yaml'),
    ]
    run_id = str(uuid.uuid4())
    image = IMAGE

    # pylint: disable=redefined-outer-name
    def test_execute_dag_containerized(
            self, dagster_airflow_docker_operator_pipeline):
        validate_pipeline_execution(dagster_airflow_docker_operator_pipeline)