def test_get_or_create_run(): with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run') assert instance.get_or_create_run(run) == run assert instance.has_run(run.run_id) assert instance.get_or_create_run(run) == run # Run is created after we check whether it exists with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run') def _has_run(self, run_id): # This is uglier than we would like because there is no nonlocal keyword in py2 global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun.create_empty_run('foo_pipeline', run_id)) return False else: return self._run_storage.has_run(run_id) instance.has_run = types.MethodType(_has_run, instance) assert instance.get_or_create_run(run) == run # Run is created after we check whether it exists, but deleted before we can get it global MOCK_HAS_RUN_CALLED # pylint:disable=global-statement MOCK_HAS_RUN_CALLED = False with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run') def _has_run(self, run_id): global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun.create_empty_run('foo_pipeline', run_id)) MOCK_HAS_RUN_CALLED = True return False elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED: MOCK_HAS_RUN_CALLED = False return True else: return False instance.has_run = types.MethodType(_has_run, instance) with pytest.raises(check.CheckError, match='Inconsistent run storage'): instance.get_or_create_run(run)
def test_two_runs_running(): run_id_one = make_new_run_id() run_id_two = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'infinite_loop_pipeline') with safe_tempfile_path() as file_one, safe_tempfile_path() as file_two: instance = DagsterInstance.local_temp() execution_manager = SubprocessExecutionManager(instance) pipeline_run_one = instance.create_run( PipelineRun.create_empty_run( pipeline_name=infinite_loop_pipeline.name, run_id=run_id_one, environment_dict={'solids': {'loop': {'config': {'file': file_one}}}}, ) ) execution_manager.execute_pipeline( handle, infinite_loop_pipeline, pipeline_run_one, instance ) pipeline_run_two = instance.create_run( PipelineRun.create_empty_run( pipeline_name=infinite_loop_pipeline.name, run_id=run_id_two, environment_dict={'solids': {'loop': {'config': {'file': file_two}}}}, ) ) execution_manager.execute_pipeline( handle, infinite_loop_pipeline, pipeline_run_two, instance ) # ensure both runs have begun execution while not os.path.exists(file_one) and not os.path.exists(file_two): time.sleep(0.1) assert execution_manager.is_process_running(run_id_one) assert execution_manager.is_process_running(run_id_two) assert execution_manager.terminate(run_id_one) assert not execution_manager.is_process_running(run_id_one) assert execution_manager.is_process_running(run_id_two) assert execution_manager.terminate(run_id_two) assert not execution_manager.is_process_running(run_id_one) assert not execution_manager.is_process_running(run_id_two)
def test_roundtrip_run(): run = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', selector=ExecutionSelector('pipey_mcpipeface'), step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, previous_run_id='previousID', ) for field in run: # ensure we have a test value to round trip for each field assert field exec_params = execution_params_from_pipeline_run(run) assert run == pipeline_run_from_execution_params(exec_params) exec_params_gql = execution_params_from_graphql( exec_params.to_graphql_input()) assert exec_params_gql == exec_params assert run == pipeline_run_from_execution_params(exec_params_gql) empty_run = PipelineRun.create_empty_run('foo', 'bar') exec_params = execution_params_from_pipeline_run(empty_run) assert empty_run == pipeline_run_from_execution_params(exec_params) exec_params_gql = execution_params_from_graphql( exec_params.to_graphql_input()) assert exec_params_gql == exec_params assert empty_run == pipeline_run_from_execution_params(exec_params_gql)
def test_max_concurrency_zero(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'infinite_loop_pipeline') with safe_tempfile_path() as filepath: instance = DagsterInstance.local_temp() execution_manager = QueueingSubprocessExecutionManager( instance, max_concurrent_runs=0) pipeline_run = instance.create_run( PipelineRun.create_empty_run( pipeline_name=infinite_loop_pipeline.name, run_id=run_id, environment_dict={ 'solids': { 'loop': { 'config': { 'file': filepath } } } }, )) execution_manager.execute_pipeline(handle, infinite_loop_pipeline, pipeline_run, instance) assert not execution_manager.is_active(run_id) assert not os.path.exists(filepath)
def test_single(): instance = DagsterInstance.local_temp() pipeline_name = 'foo_pipeline' run_id = make_new_run_id() pipeline_run = PipelineRun.create_empty_run(pipeline_name, run_id) step_keys = ['A', 'B', 'C'] with instance.compute_log_manager.watch(pipeline_run): print('outer 1') print('outer 2') print('outer 3') for step_key in step_keys: inner_step(instance, pipeline_run, step_key) for step_key in step_keys: stdout = instance.compute_log_manager.read_logs_file( run_id, step_key, ComputeIOType.STDOUT) assert normalize_file_content( stdout.data) == expected_inner_output(step_key) full_out = instance.compute_log_manager.read_logs_file( run_id, pipeline_name, ComputeIOType.STDOUT) assert normalize_file_content(full_out.data).startswith( expected_outer_prefix())
def test_multi(): instance = DagsterInstance.local_temp() pipeline_name = 'foo_pipeline' run_id = make_new_run_id() pipeline_run = PipelineRun.create_empty_run(pipeline_name, run_id) context = get_multiprocessing_context() step_keys = ['A', 'B', 'C'] with instance.compute_log_manager.watch(pipeline_run): print('outer 1') print('outer 2') print('outer 3') for step_key in step_keys: process = context.Process(target=execute_inner, args=(step_key, pipeline_run, instance.get_ref())) process.start() process.join() for step_key in step_keys: stdout = instance.compute_log_manager.read_logs_file( run_id, step_key, ComputeIOType.STDOUT) assert normalize_file_content( stdout.data) == expected_inner_output(step_key) full_out = instance.compute_log_manager.read_logs_file( run_id, pipeline_name, ComputeIOType.STDOUT) # The way that the multiprocess compute-logging interacts with pytest (which stubs out the # sys.stdout fileno) makes this difficult to test. The pytest-captured stdout only captures # the stdout from the outer process, not also the inner process assert normalize_file_content(full_out.data).startswith( expected_outer_prefix())
def test_s3_pipeline_with_custom_prefix(s3_bucket): run_id = make_new_run_id() s3_prefix = 'custom_prefix' pipe = define_inty_pipeline(should_throw=False) environment_dict = { 'storage': {'s3': {'config': {'s3_bucket': s3_bucket, 's3_prefix': s3_prefix}}} } pipeline_run = PipelineRun.create_empty_run( pipe.name, run_id=run_id, environment_dict=environment_dict ) instance = DagsterInstance.ephemeral() result = execute_pipeline( pipe, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), ) assert result.success execution_plan = create_execution_plan(pipe, environment_dict, RunConfig(run_id=run_id)) with scoped_pipeline_context( pipe, environment_dict, pipeline_run, instance, execution_plan ) as context: store = S3IntermediateStore( run_id=run_id, s3_bucket=s3_bucket, s3_prefix=s3_prefix, s3_session=context.scoped_resources_builder.build(required_resource_keys={'s3'}).s3, ) assert store.root == '/'.join(['custom_prefix', 'storage', run_id]) assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1 assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
def test_execution_plan_for_composite_solid(): environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}} } } } execution_plan = create_execution_plan(composite_pipeline, environment_dict=environment_dict) pipeline_run = PipelineRun.create_empty_run(composite_pipeline.name, make_new_run_id()) events = execute_plan( execution_plan, environment_dict=environment_dict, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral(), ) assert [e.event_type_value for e in events] == [ 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', ]
def test_single_write_read_with_snapshot(self, storage): if not isinstance(storage, InMemoryRunStorage): pytest.skip() run_with_snapshot_id = 'lkasjdflkjasdf' pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) run_with_snapshot = PipelineRun.create_empty_run( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id=pipeline_snapshot_id, ) assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id assert storage.get_pipeline_snapshot(pipeline_snapshot_id) == pipeline_snapshot storage.add_run(run_with_snapshot) assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_run(run_with_snapshot_id)
def test_valid_job_format(kubeconfig, docker_image, image_pull_policy): # pylint: disable=redefined-outer-name run_id = uuid.uuid4().hex environment_dict = load_yaml_from_path( os.path.join(environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict) run_launcher = K8sRunLauncher( image_pull_policy=image_pull_policy, image_pull_secrets=[{ 'name': 'element-dev-key' }], service_account_name='dagit-admin', instance_config_map='dagster-instance', job_image=docker_image, load_kubeconfig=True, kubeconfig_file=kubeconfig, ) job = run_launcher.construct_job(run) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run_id, job_image=docker_image, image_pull_policy=image_pull_policy, dagster_version=dagster_version, ).strip())
def test_execution_plan_simple_two_steps(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan(pipeline_def) assert isinstance(execution_plan.steps, list) assert len(execution_plan.steps) == 2 assert execution_plan.get_step_by_key('return_one.compute') assert execution_plan.get_step_by_key('add_one.compute') pipeline_run = PipelineRun.create_empty_run(pipeline_def.name, make_new_run_id()) events = execute_plan( execution_plan, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral() ) # start, out, success, start, input, out, success assert [e.event_type_value for e in events] == [ 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', ] output_events = [e for e in events if e.event_type_value == 'STEP_OUTPUT'] assert output_events[0].step_key == 'return_one.compute' assert output_events[0].is_successful_output assert output_events[1].step_key == 'add_one.compute' assert output_events[1].is_successful_output
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline) pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id()) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan(['nope']), instance, pipeline_run=pipeline_run ) assert exc_info.value.step_keys == ['nope'] assert str(exc_info.value) == 'Execution plan does not contain step: nope' with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan(['nope', 'nuh_uh']), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ['nope', 'nuh_uh'] assert str(exc_info.value) == 'Execution plan does not contain steps: nope, nuh_uh'
def test_k8s_run_launcher_celery(dagster_instance): # pylint: disable=redefined-outer-name run_id = uuid.uuid4().hex environment_dict = merge_dicts( merge_yamls([ os.path.join(environments_path(), 'env.yaml'), os.path.join(environments_path(), 'env_filesystem.yaml'), ]), get_celery_engine_config(), ) assert 'celery' in environment_dict['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict, tags) dagster_instance.launch_run(run) success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id) result = parse_raw_res(raw_logs.split('\n')) assert success assert not result.get('errors') assert result['data'] # this is bad test but proves that we got celery configured properly # to get it working would involve relying on s3 / gcs for storage assert result['data']['startPipelineExecutionForCreatedRun'][ '__typename'] == 'PythonError' assert ( 'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp' in result['data']['startPipelineExecutionForCreatedRun']['message'])
def create_test_pipeline_execution_context(logger_defs=None): run_id = make_new_run_id() loggers = check.opt_dict_param( logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition ) mode_def = ModeDefinition(logger_defs=loggers) pipeline_def = PipelineDefinition( name='test_legacy_context', solid_defs=[], mode_defs=[mode_def] ) environment_dict = {'loggers': {key: {} for key in loggers}} pipeline_run = PipelineRun.create_empty_run('test_legacy_context', run_id, environment_dict) instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def, environment_dict, pipeline_run) creation_data = create_context_creation_data( pipeline_def, environment_dict, pipeline_run, instance, execution_plan ) log_manager = create_log_manager(creation_data) scoped_resources_builder = ScopedResourcesBuilder() executor_config = create_executor_config(creation_data) return construct_pipeline_execution_context( context_creation_data=creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=SystemStorageData( intermediates_manager=InMemoryIntermediatesManager(), file_manager=LocalFileManager.for_instance(instance, run_id), ), log_manager=log_manager, executor_config=executor_config, raise_on_error=True, )
def test_has_run_query_and_terminate(): run_id_one = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'infinite_loop_pipeline') instance = DagsterInstance.local_temp() with safe_tempfile_path() as path: pipeline_run = instance.create_run( PipelineRun.create_empty_run( pipeline_name=infinite_loop_pipeline.name, run_id=run_id_one, environment_dict={'solids': {'loop': {'config': {'file': path}}}}, ) ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, infinite_loop_pipeline, pipeline_run, instance) while not os.path.exists(path): time.sleep(0.1) assert os.path.exists(path) assert execution_manager.is_process_running(run_id_one) assert execution_manager.terminate(run_id_one) assert instance.get_run_by_id(run_id_one).is_finished assert not execution_manager.is_process_running(run_id_one) assert not execution_manager.terminate(run_id_one) assert not os.path.exists(path)
def test_gcs_pipeline_with_custom_prefix(gcs_bucket): run_id = str(uuid.uuid4()) gcs_prefix = 'custom_prefix' pipe = define_inty_pipeline(should_throw=False) environment_dict = { 'storage': {'gcs': {'config': {'gcs_bucket': gcs_bucket, 'gcs_prefix': gcs_prefix}}} } pipeline_run = PipelineRun.create_empty_run( pipe.name, run_id=run_id, environment_dict=environment_dict ) instance = DagsterInstance.ephemeral() result = execute_pipeline( pipe, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), ) assert result.success execution_plan = create_execution_plan(pipe, environment_dict, run_config=pipeline_run) with scoped_pipeline_context( pipe, environment_dict, pipeline_run, instance, execution_plan ) as context: store = GCSIntermediateStore( run_id=run_id, gcs_bucket=gcs_bucket, gcs_prefix=gcs_prefix, client=context.scoped_resources_builder.build( mapper_fn=SolidInvocation.default_resource_mapper_fn, required_resource_keys={'gcs'}, ).gcs.client, ) assert store.root == '/'.join(['custom_prefix', 'storage', run_id]) assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1 assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
def test_execution_plan_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } execution_plan = create_execution_plan( composite_pipeline_with_config_mapping, environment_dict=environment_dict) pipeline_run = PipelineRun.create_empty_run( composite_pipeline_with_config_mapping.name, str(uuid.uuid4())) events = execute_plan( execution_plan, environment_dict=environment_dict, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral(), ) assert [e.event_type_value for e in events] == [ 'ENGINE_EVENT', 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', 'ENGINE_EVENT', ]
def yield_empty_pipeline_context(run_id=None, instance=None): with scoped_pipeline_context( PipelineDefinition([]), {}, PipelineRun.create_empty_run('empty', run_id=run_id), instance or DagsterInstance.ephemeral(), ) as context: yield context
def test_get_or_create_run(): instance = DagsterInstance.ephemeral() assert instance.get_runs() == [] pipeline_run = PipelineRun.create_empty_run('foo_pipeline', 'new_run') assert instance.get_or_create_run(pipeline_run) == pipeline_run assert instance.get_runs() == [pipeline_run] assert instance.get_or_create_run(pipeline_run) == pipeline_run assert instance.get_runs() == [pipeline_run] conflicting_pipeline_run = PipelineRun.create_empty_run('bar_pipeline', 'new_run') with pytest.raises(DagsterRunConflict, match='Found conflicting existing run with same id.'): instance.get_or_create_run(conflicting_pipeline_run)
def test_using_s3_for_subplan(s3_bucket): pipeline_def = define_inty_pipeline() environment_dict = { 'storage': { 's3': { 'config': { 's3_bucket': s3_bucket } } } } run_id = str(uuid.uuid4()) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id)) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun.create_empty_run( pipeline_def.name, run_id=run_id, environment_dict=environment_dict) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, 'return_one.compute') with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run, instance) as context: store = S3IntermediateStore( s3_bucket, run_id, s3_session=context.scoped_resources_builder.build().s3.session) assert store.has_intermediate(context, 'return_one.compute') assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['add_one.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, 'add_one.compute') with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run, instance) as context: assert store.has_intermediate(context, 'add_one.compute') assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
def test_write_conflicting_run_id(self, storage): double_run_id = 'double_run_id' pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[]) run = PipelineRun.create_empty_run(run_id=double_run_id, pipeline_name=pipeline_def.name) assert storage.add_run(run) with pytest.raises(DagsterRunAlreadyExists): storage.add_run(run)
def test_handle_run_event_pipeline_success_test(): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun.create_empty_run(pipeline_name='pipeline_name', run_id=run_id) run_storage.add_run(run_to_add) dagster_pipeline_start_event = DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_START.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ) run_storage.handle_run_event(run_id, dagster_pipeline_start_event) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( str(uuid.uuid4()), # diff run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( run_id, # correct run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.SUCCESS
def yield_empty_pipeline_context(run_id=None, instance=None): pipeline = PipelineDefinition([]) with scoped_pipeline_context( pipeline, {}, PipelineRun.create_empty_run('empty', run_id=run_id if run_id is not None else 'TESTING',), instance or DagsterInstance.ephemeral(), create_execution_plan(pipeline), ) as context: yield context
def _has_run(self, run_id): # This is uglier than we would like because there is no nonlocal keyword in py2 global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun.create_empty_run('foo_pipeline', run_id)) return False else: return self._run_storage.has_run(run_id)
def test_valid_job_format(run_launcher, docker_image, environments_path): # pylint: disable=redefined-outer-name run_id = uuid.uuid4().hex environment_dict = load_yaml_from_path( os.path.join(environments_path, 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict) job = run_launcher.construct_job(run) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run_id, job_image=docker_image).strip())
def test_k8s_run_launcher(dagster_instance): # pylint: disable=redefined-outer-name run_id = uuid.uuid4().hex environment_dict = load_yaml_from_path(os.path.join(environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict) dagster_instance.launch_run(run) success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id) result = parse_raw_res(raw_logs.split('\n')) assert success assert not result.get('errors') assert result['data'] assert result['data']['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess'
def test_wipe_tags(self, storage): run_id = 'some_run_id' run = PipelineRun.create_empty_run( run_id=run_id, pipeline_name='a_pipeline', tags={'foo': 'bar'} ) storage.add_run(run) assert storage.get_run_by_id(run_id) == run assert dict(storage.get_run_tags()) == {'foo': {'bar'}} storage.wipe() assert list(storage.get_runs()) == [] assert dict(storage.get_run_tags()) == {}
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id()) with pytest.raises(DagsterStepOutputNotFoundError): execute_plan( execution_plan.build_subset_plan(['add_one.compute']), DagsterInstance.ephemeral(), environment_dict=environment_dict, pipeline_run=pipeline_run, )
def _has_run(self, run_id): global MOCK_HAS_RUN_CALLED # pylint: disable=global-statement # pylint: disable=protected-access if not self._run_storage.has_run( run_id) and not MOCK_HAS_RUN_CALLED: self._run_storage.add_run( PipelineRun.create_empty_run('foo_pipeline', run_id)) MOCK_HAS_RUN_CALLED = True return False elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED: MOCK_HAS_RUN_CALLED = False return True else: return False
def test_nuke(): storage = SqliteRunStorage.mem() assert storage run_id = str(uuid.uuid4()) storage.add_run( PipelineRun.create_empty_run(run_id=run_id, pipeline_name='some_pipeline')) assert len(storage.all_runs) == 1 storage.wipe() assert list(storage.all_runs) == []