def test_execute_execute_plan_mutation_raw(): pipeline_name = 'sleepy_pipeline' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_examples.toys.sleepy', pipeline_name) pipeline = handle.build_pipeline_definition() instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline(pipeline=pipeline) variables = { 'executionParams': { 'environmentConfigData': {}, 'mode': 'default', 'selector': { 'name': pipeline_name }, 'executionMetadata': { 'runId': pipeline_run.run_id }, } } result = execute_execute_plan_mutation_raw(handle, variables, instance_ref=instance.get_ref()) seen_events = set() for event in result: seen_events.add((event.dagster_event.event_type_value, event.step_key)) assert seen_events == EXPECTED_EVENTS
def make_airflow_dag_kubernetized( module_name, pipeline_name, image, namespace, environment_dict=None, mode=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, ): from .operators.kubernetes_operator import DagsterKubernetesPodOperator check.str_param(module_name, 'module_name') handle = ExecutionTargetHandle.for_pipeline_module(module_name, pipeline_name) # See: https://github.com/dagster-io/dagster/issues/1663 op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str) op_kwargs['image'] = image op_kwargs['namespace'] = namespace return _make_airflow_dag( handle=handle, pipeline_name=pipeline_name, environment_dict=environment_dict, mode=mode, dag_id=dag_id, dag_description=dag_description, dag_kwargs=dag_kwargs, op_kwargs=op_kwargs, operator=DagsterKubernetesPodOperator, )
def test_repo_module_dynamic_load(): repository = ExecutionTargetHandle.for_pipeline_module( module_name='dagster_examples.intro_tutorial.repos', fn_name='define_repo_demo_pipeline').build_repository_definition() assert isinstance(repository, RepositoryDefinition) assert repository.name == EPHEMERAL_NAME
def make_airflow_dag_containerized( module_name, pipeline_name, image, environment_dict=None, mode=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, ): check.str_param(module_name, 'module_name') handle = ExecutionTargetHandle.for_pipeline_module(module_name, pipeline_name) op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str) op_kwargs['image'] = image return _make_airflow_dag( handle=handle, pipeline_name=pipeline_name, environment_dict=environment_dict, mode=mode, dag_id=dag_id, dag_description=dag_description, dag_kwargs=dag_kwargs, op_kwargs=op_kwargs, operator=DagsterDockerOperator, )
def test_dask_cluster(dask_address, s3_bucket): # https://github.com/dagster-io/dagster/issues/1748 with pytest.raises(check.CheckError, match='Must use remote DagsterInstance'): _result = execute_pipeline( ExecutionTargetHandle.for_pipeline_module( 'dagster_examples.toys.hammer', 'hammer_pipeline').build_pipeline_definition(), environment_dict={ 'storage': { 's3': { 'config': { 's3_bucket': s3_bucket } } }, 'execution': { 'dask': { 'config': { 'address': '%s:8786' % dask_address } } }, }, # needs to become remote to work instance=DagsterInstance.local_temp(), )
class TestExecuteDagContainerizedS3Storage(object): pipeline_name = 'demo_pipeline' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name) environment_yaml = [ script_relative_path('test_project/env.yaml'), script_relative_path('test_project/env_s3.yaml'), ] run_id = str(uuid.uuid4()) execution_date = datetime.datetime.utcnow() image = IMAGE # pylint: disable=redefined-outer-name def test_execute_dag_containerized( self, dagster_airflow_docker_operator_pipeline): for result in dagster_airflow_docker_operator_pipeline: assert 'data' in result assert 'executePlan' in result['data'] assert '__typename' in result['data']['executePlan'] assert result['data']['executePlan'][ '__typename'] == 'ExecutePlanSuccess' result = list( filter( lambda x: x['__typename'] == 'ExecutionStepOutputEvent', result['data']['executePlan']['stepEvents'], ))[0] if result['step']['kind'] == 'INPUT_THUNK': continue
def test_error_dag_k8s(): pipeline_name = 'demo_error_pipeline' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name) environment_yaml = [ script_relative_path('test_project/env_s3.yaml'), ] environment_dict = load_yaml_from_glob_list(environment_yaml) run_id = str(uuid.uuid4()) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_kubernetized_for_handle( handle=handle, pipeline_name=pipeline_name, image=IMAGE, namespace='default', environment_dict=environment_dict, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_error_dag_k8s(dagster_docker_image, cluster_provider): print('--- :airflow: test_kubernetes.test_error_dag_k8s') _check_aws_creds_available() pipeline_name = 'demo_error_pipeline' handle = ExecutionTargetHandle.for_pipeline_module('test_pipelines.repo', pipeline_name) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, 'env_s3.yaml'), ] environment_dict = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_kubernetized_for_handle( handle=handle, pipeline_name=pipeline_name, image=dagster_docker_image, namespace='default', environment_dict=environment_dict, op_kwargs={ 'config_file': os.environ['KUBECONFIG'], 'env_vars': { 'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'], 'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'], }, }, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_s3_storage(dagster_airflow_k8s_operator_pipeline, dagster_docker_image, cluster_provider): print('--- :airflow: test_kubernetes.test_s3_storage') _check_aws_creds_available() environments_path = test_project_environments_path() pipeline_name = 'demo_pipeline' results = dagster_airflow_k8s_operator_pipeline( pipeline_name=pipeline_name, handle=ExecutionTargetHandle.for_pipeline_module( 'test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_s3.yaml'), ], image=dagster_docker_image, op_kwargs={ 'config_file': os.environ['KUBECONFIG'], 'env_vars': { 'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'], 'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'], }, }, ) validate_pipeline_execution(results)
def make_airflow_dag( module_name, pipeline_name, environment_dict=None, mode=None, instance=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, ): check.str_param(module_name, 'module_name') handle = ExecutionTargetHandle.for_pipeline_module(module_name, pipeline_name) return _make_airflow_dag( handle=handle, pipeline_name=pipeline_name, environment_dict=environment_dict, mode=mode, instance=instance, dag_id=dag_id, dag_description=dag_description, dag_kwargs=dag_kwargs, op_kwargs=op_kwargs, )
class TestExecuteSkipsContainerized(object): pipeline_name = 'optional_outputs' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name) environment_yaml = [ script_relative_path('test_project/env_filesystem.yaml') ] run_id = str(uuid.uuid4()) execution_date = datetime.datetime.utcnow() op_kwargs = {'host_tmp_dir': '/tmp'} image = IMAGE # pylint: disable=redefined-outer-name def test_execute_dag_containerized( self, dagster_airflow_docker_operator_pipeline): expected_airflow_task_states = { ('foo', None), ('first_consumer', None), ('second_consumer', 'skipped'), ('third_consumer', 'skipped'), } seen = {(ti.task_id, ti.current_state()) for ti in dagster_airflow_docker_operator_pipeline.keys()} assert seen == expected_airflow_task_states
def test_error_dag_k8s( dagster_docker_image, environments_path ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_error_pipeline' handle = ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name) environment_yaml = [ os.path.join(environments_path, 'env_s3.yaml'), ] environment_dict = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_kubernetized_for_handle( handle=handle, pipeline_name=pipeline_name, image=dagster_docker_image, namespace='default', environment_dict=environment_dict, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_my_custom_operator( dagster_airflow_custom_operator_pipeline, caplog, ): # pylint: disable=redefined-outer-name caplog.set_level(logging.INFO, logger='CustomOperatorLogger') pipeline_name = 'demo_pipeline' operator = CustomOperator environments_path = test_project_environments_path() results = dagster_airflow_custom_operator_pipeline( pipeline_name=pipeline_name, handle=ExecutionTargetHandle.for_pipeline_module('test_pipelines.repo', pipeline_name), operator=operator, environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_filesystem_no_explicit_base_dir.yaml'), ], ) validate_pipeline_execution(results) log_lines = 0 for record in caplog.records: if record.name == 'CustomOperatorLogger': log_lines += 1 assert record.message == 'CustomOperator is called' assert log_lines == 2
def test_repo_module_dynamic_load(): handle = ExecutionTargetHandle.for_pipeline_module( module_name='dagster_examples.intro_tutorial.repos', fn_name='hello_cereal_pipeline') repository = handle.build_repository_definition() assert isinstance(repository, RepositoryDefinition) assert repository.name == EPHEMERAL_NAME assert ExecutionTargetHandle.get_handle(repository) == (handle, None)
def test_repo_module_dynamic_load_from_pipeline(): repository = ExecutionTargetHandle.for_pipeline_module( module_name='dagster_examples.intro_tutorial.repos', fn_name='define_repo_demo_pipeline').build_repository_definition() assert isinstance(repository, RepositoryDefinition) assert repository.name == '<<unnamed>>' assert repository.get_pipeline( 'repo_demo_pipeline').name == 'repo_demo_pipeline'
def test_skip_operator( dagster_airflow_python_operator_pipeline, environments_path, ): # pylint: disable=redefined-outer-name pipeline_name = 'optional_outputs' results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, handle=ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name), environment_yaml=[os.path.join(environments_path, 'env_filesystem.yaml')], ) validate_skip_pipeline_execution(results)
class TestExecuteSkipsPythonOperator(object): pipeline_name = 'optional_outputs' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name ) environment_yaml = [script_relative_path('test_project/env_filesystem.yaml')] run_id = str(uuid.uuid4()) # pylint: disable=redefined-outer-name def test_execute_dag(self, dagster_airflow_python_operator_pipeline): validate_skip_pipeline_execution(dagster_airflow_python_operator_pipeline)
def test_repo_module_dynamic_load_from_pipeline(): handle = ExecutionTargetHandle.for_pipeline_module( module_name='dagster_examples.intro_tutorial.repos', fn_name='hello_cereal_pipeline' ) handle = ExecutionTargetHandle.from_dict(handle.to_dict()) repository = handle.build_repository_definition() assert isinstance(repository, RepositoryDefinition) assert repository.name == '<<unnamed>>' assert repository.get_pipeline('hello_cereal_pipeline').name == 'hello_cereal_pipeline' assert ExecutionTargetHandle.get_handle(repository) == (handle, None)
def test_dask_cluster(dask_address): result = execute_pipeline( ExecutionTargetHandle.for_pipeline_module( 'dagster_examples.toys.hammer', 'hammer_pipeline' ).build_pipeline_definition(), environment_dict={ 'storage': {'s3': {'config': {'s3_bucket': 'dagster-airflow-scratch'}}}, 'execution': {'dask': {'config': {'address': '%s:8786' % dask_address}}}, }, ) assert result.success assert result.result_for_solid('reducer').output_value() == 4
def test_gcs_storage( dagster_airflow_python_operator_pipeline, environments_path, ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline_gcs' results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, handle=ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_gcs.yaml'), ], ) validate_pipeline_execution(results)
class TestExecuteSkipsContainerized(object): pipeline_name = 'optional_outputs' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name ) environment_yaml = [script_relative_path('test_project/env_filesystem.yaml')] run_id = str(uuid.uuid4()) op_kwargs = {'host_tmp_dir': '/tmp'} image = IMAGE # pylint: disable=redefined-outer-name def test_execute_dag_containerized(self, dagster_airflow_docker_operator_pipeline): validate_skip_pipeline_execution(dagster_airflow_docker_operator_pipeline)
class TestExecuteDagPythonS3Storage(object): pipeline_name = 'demo_pipeline' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name) environment_yaml = [ script_relative_path('test_project/env.yaml'), script_relative_path('test_project/env_s3.yaml'), ] run_id = str(uuid.uuid4()) # pylint: disable=redefined-outer-name def test_execute_dag(self, dagster_airflow_python_operator_pipeline): validate_pipeline_execution(dagster_airflow_python_operator_pipeline)
class TestAirflowizedEventPipeline(object): config_yaml = [ script_relative_path( '../../dagster_examples/airline_demo/environments/default.yaml') ] pipeline_name = 'event_ingest_pipeline' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_examples.event_pipeline_demo', pipeline_name) # pylint: disable=redefined-outer-name def test_airflowized_event_pipeline( self, dagster_airflow_python_operator_pipeline): pass
def test_skip_operator(dagster_airflow_docker_operator_pipeline, dagster_docker_image): # pylint: disable=redefined-outer-name pipeline_name = 'optional_outputs' environments_path = test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, handle=ExecutionTargetHandle.for_pipeline_module( 'test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env_filesystem.yaml') ], op_kwargs={'host_tmp_dir': '/tmp'}, image=dagster_docker_image, ) validate_skip_pipeline_execution(results)
def test_s3_storage(dagster_airflow_docker_operator_pipeline, dagster_docker_image): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline' environments_path = test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, handle=ExecutionTargetHandle.for_pipeline_module( 'test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_s3.yaml'), ], image=dagster_docker_image, ) validate_pipeline_execution(results)
def test_fs_storage_no_explicit_base_dir( dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline' environments_path = test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, handle=ExecutionTargetHandle.for_pipeline_module( 'test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_filesystem_no_explicit_base_dir.yaml'), ], ) validate_pipeline_execution(results)
def test_error_dag_python(environments_path): # pylint: disable=redefined-outer-name pipeline_name = 'demo_error_pipeline' handle = ExecutionTargetHandle.for_pipeline_module('test_pipelines', pipeline_name) environment_yaml = [ os.path.join(environments_path, 'env_filesystem.yaml'), ] environment_dict = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_for_handle(handle, pipeline_name, environment_dict) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id=str(uuid.uuid4()), execution_date=execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_dask_cluster(): result = execute_on_dask( ExecutionTargetHandle.for_pipeline_module( 'dagster_examples.toys.hammer', 'define_hammer_pipeline'), env_config={ 'storage': { 's3': { 's3_bucket': 'dagster-airflow-scratch' } } }, run_config=RunConfig(storage_mode=RunStorageMode.S3), dask_config=DaskConfig(address='%s:8786' % os.getenv('DASK_ADDRESS')), ) assert result.success assert result.result_for_solid('total').transformed_value() == 4
class TestExecuteDagKubernetizedGCSStorage(object): pipeline_name = 'demo_pipeline_gcs' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name) environment_yaml = [ script_relative_path('test_project/env.yaml'), script_relative_path('test_project/env_gcs.yaml'), ] run_id = str(uuid.uuid4()) image = IMAGE # pylint: disable=redefined-outer-name def test_execute_dag_kubernetized(self, dagster_airflow_k8s_operator_pipeline): validate_pipeline_execution(dagster_airflow_k8s_operator_pipeline)
class TestExecuteDagContainerizedFilesystemStorageNoExplicitBaseDir(object): pipeline_name = 'demo_pipeline' handle = ExecutionTargetHandle.for_pipeline_module( 'dagster_airflow_tests.test_project.dagster_airflow_demo', pipeline_name) environment_yaml = [ script_relative_path('test_project/env.yaml'), script_relative_path( 'test_project/env_filesystem_no_explicit_base_dir.yaml'), ] run_id = str(uuid.uuid4()) image = IMAGE # pylint: disable=redefined-outer-name def test_execute_dag_containerized( self, dagster_airflow_docker_operator_pipeline): validate_pipeline_execution(dagster_airflow_docker_operator_pipeline)