def test_error_dag_containerized(dagster_docker_image): # pylint: disable=redefined-outer-name pipeline_name = "demo_error_pipeline" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo" ) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_s3.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() with postgres_instance() as instance: dag, tasks = make_airflow_dag_containerized_for_recon_repo( recon_repo, pipeline_name, dagster_docker_image, run_config, instance=instance, op_kwargs={"network_mode": "container:test-postgres-db-airflow"}, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert "Exception: Unusual error" in str(exc_info.value)
def test_s3_storage(dagster_airflow_k8s_operator_pipeline, dagster_docker_image, cluster_provider): print('--- :airflow: test_kubernetes.test_s3_storage') _check_aws_creds_available() environments_path = test_project_environments_path() pipeline_name = 'demo_pipeline' results = dagster_airflow_k8s_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( 'dagster_test.test_project.test_pipelines.repo', 'define_demo_execution_repo', ), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_s3.yaml'), ], image=dagster_docker_image, op_kwargs={ 'config_file': os.environ['KUBECONFIG'], 'env_vars': { 'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'], 'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'], }, }, ) validate_pipeline_execution(results)
def test_airflow_execution_date_tags_job(): job_name = "demo_airflow_execution_date_job" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", job_name) environments_path = get_test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_filesystem.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_for_recon_repo(recon_repo, job_name, run_config) results = execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) materialized_airflow_execution_date = None for result in results.values(): for event in result: if event.event_type_value == "ASSET_MATERIALIZATION": materialization = event.event_specific_data.materialization materialization_entry = materialization.metadata_entries[0] materialized_airflow_execution_date = materialization_entry.entry_data.text assert execution_date.isoformat() == materialized_airflow_execution_date
def test_origin_ids_stable(monkeypatch): # This test asserts fixed schedule origin IDs to prevent any changes from # accidentally shifting these ids that are persisted to ScheduleStorage # stable exe path for test monkeypatch.setattr(sys, "executable", "/fake/python") file_repo = ReconstructableRepository.for_file("/path/to/file", "the_repo", "/path/to/working_dir") # ensure monkeypatch worked assert file_repo.get_origin().executable_path == "/fake/python" assert file_repo.get_origin_id( ) == "3766b1c554fd961b88b9301756250febff3d0ffa" schedule = file_repo.get_reconstructable_schedule("simple_schedule") assert schedule.get_origin_id( ) == "7c60d01588673ffcaea16b6fd59d998dc63ed3c3" module_repo = ReconstructableRepository.for_module("dummy_module", "the_repo") assert module_repo.get_origin_id( ) == "86503fc349d4ecf44bd22ca1de64c10f8ffcebbd" module_schedule = module_repo.get_reconstructable_schedule( "simple_schedule") assert module_schedule.get_origin_id( ) == "e4c7131b74ad600969876d8fa461f215ced9631a"
def test_error_dag_k8s(dagster_docker_image, cluster_provider): print('--- :airflow: test_kubernetes.test_error_dag_k8s') _check_aws_creds_available() pipeline_name = 'demo_error_pipeline' handle = ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, 'env_s3.yaml'), ] environment_dict = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_kubernetized_for_handle( handle=handle, pipeline_name=pipeline_name, image=dagster_docker_image, namespace='default', environment_dict=environment_dict, op_kwargs={ 'config_file': os.environ['KUBECONFIG'], 'env_vars': { 'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'], 'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'], }, }, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_my_custom_operator( dagster_airflow_custom_operator_pipeline, caplog, ): # pylint: disable=redefined-outer-name caplog.set_level(logging.INFO, logger="CustomOperatorLogger") pipeline_name = "demo_pipeline_s3" operator = CustomOperator environments_path = get_test_project_environments_path() results = dagster_airflow_custom_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", pipeline_name), operator=operator, environment_yaml=[ os.path.join(environments_path, "env.yaml"), os.path.join(environments_path, "env_s3.yaml"), ], ) validate_pipeline_execution(results) log_lines = 0 for record in caplog.records: if record.name == "CustomOperatorLogger": log_lines += 1 assert record.message == "CustomOperator is called" assert log_lines == 2
def test_reconstructable_cli_args(): recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function') assert recon_file.get_cli_args() == '-f {foo_file} -a bar_function'.format( foo_file=os.path.abspath(os.path.expanduser('foo_file'))) recon_module = ReconstructableRepository.for_module( 'foo_module', 'bar_function') assert recon_module.get_cli_args() == '-m foo_module -a bar_function'
def recon_repo_for_cli_args(kwargs): """Builds a ReconstructableRepository for CLI arguments, which can be any of the combinations for repo loading above. """ check.dict_param(kwargs, "kwargs") _cli_load_invariant(kwargs.get("pipeline_name") is None) if kwargs.get("workspace"): check.not_implemented( "Workspace not supported yet in this cli command") elif kwargs.get("module_name") and kwargs.get("fn_name"): _cli_load_invariant(kwargs.get("repository_yaml") is None) _cli_load_invariant(kwargs.get("python_file") is None) return ReconstructableRepository.for_module( kwargs["module_name"], kwargs["fn_name"], get_working_directory_from_kwargs(kwargs), ) elif kwargs.get("python_file") and kwargs.get("fn_name"): _cli_load_invariant(kwargs.get("repository_yaml") is None) _cli_load_invariant(kwargs.get("module_name") is None) return ReconstructableRepository.for_file( os.path.abspath(kwargs["python_file"]), kwargs["fn_name"], get_working_directory_from_kwargs(kwargs), ) else: _cli_load_invariant(False)
def test_airflow_execution_date_tags_containerized(dagster_docker_image, ): # pylint: disable=redefined-outer-name, unused-argument pipeline_name = "demo_airflow_execution_date_pipeline" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo") environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_s3.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_containerized_for_recon_repo( recon_repo, pipeline_name, dagster_docker_image, run_config) results = execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) materialized_airflow_execution_date = None for result in results.values(): for event in result: if event.event_type_value == "STEP_MATERIALIZATION": materialization = event.event_specific_data.materialization materialization_entry = materialization.metadata_entries[0] materialized_airflow_execution_date = materialization_entry.entry_data.text assert execution_date.isoformat() == materialized_airflow_execution_date
def test_s3_storage(dagster_airflow_k8s_operator_pipeline, dagster_docker_image, cluster_provider): _check_aws_creds_available() environments_path = test_project_environments_path() pipeline_name = "demo_pipeline" results = dagster_airflow_k8s_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo", ), environment_yaml=[ os.path.join(environments_path, "env.yaml"), os.path.join(environments_path, "env_s3.yaml"), ], image=dagster_docker_image, op_kwargs={ "config_file": os.environ["KUBECONFIG"], "env_vars": { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"], "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"], }, }, ) validate_pipeline_execution(results)
def test_error_dag_k8s(dagster_docker_image, cluster_provider): _check_aws_creds_available() pipeline_name = "demo_error_pipeline" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo") environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_s3.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_kubernetized_for_recon_repo( recon_repo=recon_repo, pipeline_name=pipeline_name, image=dagster_docker_image, namespace="default", run_config=run_config, op_kwargs={ "config_file": os.environ["KUBECONFIG"], "env_vars": { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"], "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"], }, }, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert "Exception: Unusual error" in str(exc_info.value)
def test_my_custom_operator( dagster_airflow_custom_operator_pipeline, caplog, ): # pylint: disable=redefined-outer-name caplog.set_level(logging.INFO, logger='CustomOperatorLogger') pipeline_name = 'demo_pipeline' operator = CustomOperator environments_path = test_project_environments_path() results = dagster_airflow_custom_operator_pipeline( pipeline_name=pipeline_name, handle=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name), operator=operator, environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_filesystem_no_explicit_base_dir.yaml'), ], ) validate_pipeline_execution(results) log_lines = 0 for record in caplog.records: if record.name == 'CustomOperatorLogger': log_lines += 1 assert record.message == 'CustomOperator is called' assert log_lines == 2
def recon_repo_for_cli_args(kwargs): '''Builds a ReconstructableRepository for CLI arguments, which can be any of the combinations for repo loading above. ''' check.dict_param(kwargs, 'kwargs') _cli_load_invariant(kwargs.get('pipeline_name') is None) if kwargs.get('repository_yaml') or all_none(kwargs): _cli_load_invariant(kwargs.get('module_name') is None) _cli_load_invariant(kwargs.get('python_file') is None) _cli_load_invariant(kwargs.get('fn_name') is None) repo_yaml = ( os.path.abspath(kwargs.get('repository_yaml')) if kwargs.get('repository_yaml') else DEFAULT_REPOSITORY_YAML_FILENAME ) _cli_load_invariant( os.path.exists(repo_yaml), 'Expected to use file "{}" to load repository but it does not exist. ' 'Verify your current working directory or CLI arguments.'.format(repo_yaml), ) return ReconstructableRepository.from_yaml(repo_yaml) elif kwargs.get('module_name') and kwargs.get('fn_name'): _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('python_file') is None) return ReconstructableRepository.for_module(kwargs['module_name'], kwargs['fn_name']) elif kwargs.get('python_file') and kwargs.get('fn_name'): _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('module_name') is None) return ReconstructableRepository.for_file( os.path.abspath(kwargs['python_file']), kwargs['fn_name'] ) else: _cli_load_invariant(False)
def test_origin_ids_stable(monkeypatch): # This test asserts fixed schedule origin IDs to prevent any changes from # accidentally shifting these ids that are persisted to ScheduleStorage # stable exe path for test monkeypatch.setattr(sys, 'executable', '/fake/python') file_repo = ReconstructableRepository.for_file('/path/to/file', 'the_repo') # ensure monkeypatch worked assert file_repo.get_origin().executable_path == '/fake/python' assert file_repo.get_origin_id( ) == '9ce1e0f6f059725bb6f85deeaea0322734bd77d6' schedule = file_repo.get_reconstructable_schedule('simple_schedule') assert schedule.get_origin_id( ) == 'f1a21671fccf4c986d20f40cac0730e47daa0183' module_repo = ReconstructableRepository.for_module('dummy_module', 'the_repo') assert module_repo.get_origin_id( ) == '86503fc349d4ecf44bd22ca1de64c10f8ffcebbd' module_schedule = module_repo.get_reconstructable_schedule( 'simple_schedule') assert module_schedule.get_origin_id( ) == 'e4c7131b74ad600969876d8fa461f215ced9631a'
def define_examples_context(): return DagsterGraphQLContext( locations=[ InProcessRepositoryLocation( ReconstructableRepository.for_module( 'dagster_examples', 'define_internal_dagit_repository'), ) ], instance=DagsterInstance.ephemeral(), )
def get_reconstructable_repository_from_origin_kwargs(kwargs): if kwargs.get('python_file'): _check_cli_arguments_none(kwargs, 'module_name') return ReconstructableRepository.for_file(kwargs.get('python_file'), kwargs.get('attribute')) if kwargs.get('module_name'): return ReconstructableRepository.for_module(kwargs.get('module_name'), kwargs.get('attribute')) check.failed('invalid')
def test_skip_operator( dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = 'optional_outputs' environments_path = test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name), environment_yaml=[os.path.join(environments_path, 'env_filesystem.yaml')], ) validate_skip_pipeline_execution(results)
def define_examples_context(): return DagsterGraphQLContext( environments=[ InProcessDagsterEnvironment( ReconstructableRepository.for_module('dagster_examples', 'define_demo_repo'), execution_manager=SynchronousExecutionManager(), ) ], instance=DagsterInstance.ephemeral(), )
def test_gcs_storage(dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = "demo_pipeline_gcs" environments_path = get_test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", pipeline_name), environment_yaml=[ os.path.join(environments_path, "env.yaml"), os.path.join(environments_path, "env_gcs.yaml"), ], ) validate_pipeline_execution(results)
class TestAirflowizedEventPipeline(object): config_yaml = [ script_relative_path('../../dagster_examples/airline_demo/environments/default.yaml') ] pipeline_name = 'event_ingest_pipeline' recon_repo = ReconstructableRepository.for_module( 'dagster_examples.event_pipeline_demo', pipeline_name ) # pylint: disable=redefined-outer-name def test_airflowized_event_pipeline(self, dagster_airflow_python_operator_pipeline): pass
def test_s3_storage(dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline' environments_path = test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, handle=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_s3.yaml'), ], ) validate_pipeline_execution(results)
def test_smoke_app(): flask_app = app.create_app_with_reconstructable_repo( ReconstructableRepository.for_module( module='dagster_examples.intro_tutorial.repos', fn_name='define_repo'), DagsterInstance.ephemeral(), ) client = flask_app.test_client() result = client.post( '/graphql', data={ 'query': 'query { pipelinesOrError { ... on PipelineConnection { nodes { name } } } }' }, ) data = json.loads(result.data.decode('utf-8')) assert len(data['data']['pipelinesOrError']['nodes']) == 2 assert { node_data['name'] for node_data in data['data']['pipelinesOrError']['nodes'] } == set(['hello_cereal_pipeline', 'complex_pipeline']) result = client.get('/graphql') assert result.status_code == 400 data = json.loads(result.data.decode('utf-8')) assert len(data['errors']) == 1 assert data['errors'][0]['message'] == 'Must provide query string.' result = client.get('/dagit/notebook?path=foo.bar') assert result.status_code == 400 assert result.data.decode('utf-8') == 'Invalid Path' result = client.post('/graphql', data={'query': 'query { version { slkjd } }'}) data = json.loads(result.data.decode('utf-8')) assert 'errors' in data assert len(data['errors']) == 1 assert 'must not have a sub selection' in data['errors'][0]['message'] # Missing routes return the index.html file of the Dagit react app, so the user # gets our UI when they navigate to "synthetic" react router URLs. result = client.get('static/foo/bar') assert result.status_code == 200 assert "You need to enable JavaScript to run this app." in result.data.decode( 'utf-8') result = client.get('pipelines/foo') assert result.status_code == 200 assert "You need to enable JavaScript to run this app." in result.data.decode( 'utf-8')
def test_reconstructable_cli_args(): recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function', '/path/to/working_dir') assert recon_file.get_cli_args( ) == '-f {foo_file} -a bar_function -d /path/to/working_dir'.format( foo_file=os.path.abspath(os.path.expanduser('foo_file'))) recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function') assert recon_file.get_cli_args( ) == '-f {foo_file} -a bar_function -d {working_dir}'.format( foo_file=os.path.abspath(os.path.expanduser('foo_file')), working_dir=os.getcwd()) recon_module = ReconstructableRepository.for_module( 'foo_module', 'bar_function') assert recon_module.get_cli_args() == '-m foo_module -a bar_function'
def test_reconstructable_cli_args(): recon_file = ReconstructableRepository.for_file( "foo_file", "bar_function", "/path/to/working_dir" ) assert recon_file.get_cli_args() == "-f {foo_file} -a bar_function -d {working_directory}".format( foo_file=os.path.abspath(os.path.expanduser("foo_file")), working_directory=os.path.abspath(os.path.expanduser("/path/to/working_dir")), ) recon_file = ReconstructableRepository.for_file("foo_file", "bar_function") assert recon_file.get_cli_args() == "-f {foo_file} -a bar_function -d {working_dir}".format( foo_file=os.path.abspath(os.path.expanduser("foo_file")), working_dir=os.getcwd() ) recon_module = ReconstructableRepository.for_module("foo_module", "bar_function") assert recon_module.get_cli_args() == "-m foo_module -a bar_function"
def test_skip_operator(dagster_airflow_docker_operator_pipeline, dagster_docker_image): # pylint: disable=redefined-outer-name pipeline_name = 'optional_outputs' environments_path = test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, handle=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env_filesystem.yaml') ], op_kwargs={'host_tmp_dir': '/tmp'}, image=dagster_docker_image, ) validate_skip_pipeline_execution(results)
def test_fs_storage_no_explicit_base_dir( dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline' environments_path = test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( 'dagster_test.test_project.test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_filesystem_no_explicit_base_dir.yaml'), ], ) validate_pipeline_execution(results)
def test_skip_operator( dagster_airflow_docker_operator_pipeline, dagster_docker_image ): # pylint: disable=redefined-outer-name pipeline_name = "optional_outputs" environments_path = test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo", ), environment_yaml=[os.path.join(environments_path, "env_filesystem.yaml")], op_kwargs={"host_tmp_dir": "/tmp"}, image=dagster_docker_image, ) validate_skip_pipeline_execution(results)
def test_error_dag_python(): # pylint: disable=redefined-outer-name pipeline_name = 'demo_error_pipeline' recon_repo = ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, 'env_filesystem.yaml'), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_for_recon_repo(recon_repo, pipeline_name, run_config) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_gcs_storage( dagster_airflow_docker_operator_pipeline, dagster_docker_image, ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline_gcs' environments_path = test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( 'test_pipelines.repo', 'define_demo_execution_repo' ), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_gcs.yaml'), ], image=dagster_docker_image, ) validate_pipeline_execution(results)
def test_s3_storage(dagster_airflow_docker_operator_pipeline, dagster_docker_image): # pylint: disable=redefined-outer-name pipeline_name = "demo_pipeline" environments_path = test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo", ), environment_yaml=[ os.path.join(environments_path, "env.yaml"), os.path.join(environments_path, "env_s3.yaml"), ], image=dagster_docker_image, ) validate_pipeline_execution(results)