def start_pipeline_execution(graphene_info, execution_params, reexecution_config): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.inst_param(execution_params, 'execution_params', ExecutionParams) check.opt_inst_param(reexecution_config, 'reexecution_config', ReexecutionConfig) instance = graphene_info.context.instance dauphin_pipeline = get_dauphin_pipeline_from_selector( graphene_info, execution_params.selector) get_validated_config( graphene_info, dauphin_pipeline, environment_dict=execution_params.environment_dict, mode=execution_params.mode, ) execution_plan = create_execution_plan( dauphin_pipeline.get_dagster_pipeline(), execution_params.environment_dict, run_config=RunConfig(mode=execution_params.mode), ) _check_start_pipeline_execution_errors(graphene_info, execution_params, execution_plan, reexecution_config) run = instance.create_run( PipelineRun( pipeline_name=dauphin_pipeline.get_dagster_pipeline().name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), selector=execution_params.selector, environment_dict=execution_params.environment_dict, mode=execution_params.mode, reexecution_config=reexecution_config, step_keys_to_execute=execution_params.step_keys, status=PipelineRunStatus.NOT_STARTED, )) graphene_info.context.execution_manager.execute_pipeline( graphene_info.context.get_handle(), dauphin_pipeline.get_dagster_pipeline(), run, raise_on_error=graphene_info.context.raise_on_error, instance=instance, ) return graphene_info.schema.type_named('StartPipelineExecutionSuccess')( run=graphene_info.schema.type_named('PipelineRun')(run))
def create_valid_pipeline_run(graphene_info, external_pipeline, execution_params): if execution_params.mode is None and len(external_pipeline.available_modes) > 1: raise UserFacingGraphQLError( GrapheneNoModeProvidedError(external_pipeline.name, external_pipeline.available_modes) ) elif execution_params.mode is None and len(external_pipeline.available_modes) == 1: mode = external_pipeline.available_modes[0] else: mode = execution_params.mode ensure_valid_config(external_pipeline, mode, execution_params.run_config) step_keys_to_execute, known_state = compute_step_keys_to_execute( graphene_info, execution_params ) external_execution_plan = get_external_execution_plan_or_raise( graphene_info=graphene_info, external_pipeline=external_pipeline, mode=mode, run_config=execution_params.run_config, step_keys_to_execute=step_keys_to_execute, known_state=known_state, ) tags = merge_dicts(external_pipeline.tags, execution_params.execution_metadata.tags) pipeline_run = graphene_info.context.instance.create_run( pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=external_execution_plan.execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, pipeline_name=execution_params.selector.pipeline_name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), solid_selection=execution_params.selector.solid_selection, solids_to_execute=frozenset(execution_params.selector.solid_selection) if execution_params.selector.solid_selection else None, run_config=execution_params.run_config, mode=mode, step_keys_to_execute=step_keys_to_execute, tags=tags, root_run_id=execution_params.execution_metadata.root_run_id, parent_run_id=execution_params.execution_metadata.parent_run_id, status=PipelineRunStatus.NOT_STARTED, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) return pipeline_run
def test_listen_notify_filter_run_event(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) @solid def return_one(_): return 1 def _solids(): return_one() run_id_one = make_new_run_id() run_id_two = make_new_run_id() # only watch one of the runs event_list = [] event_log_storage.event_watcher.watch_run(run_id_two, 0, event_list.append) try: events_one, _result_one = synthesize_events(_solids, run_id=run_id_one) for event in events_one: event_log_storage.store_event(event) events_two, _result_two = synthesize_events(_solids, run_id=run_id_two) for event in events_two: event_log_storage.store_event(event) start = time.time() while len(event_list) < len( events_two) and time.time() - start < TEST_TIMEOUT: pass assert len(event_list) == len(events_two) # uncomment when https://github.com/dagster-io/dagster/issues/3368 is resolved with structured event # assert all([isinstance(event, DagsterEventRecord) for event in event_list]) finally: del event_log_storage
def test_s3_intermediate_storage(mock_s3_bucket): run_id = make_new_run_id() run_id_2 = make_new_run_id() intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=mock_s3_bucket.name) assert intermediate_storage.root == "/".join( ["dagster", "storage", run_id]) intermediate_storage_2 = S3IntermediateStorage( run_id=run_id_2, s3_bucket=mock_s3_bucket.name) assert intermediate_storage_2.root == "/".join( ["dagster", "storage", run_id_2]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate(context, RuntimeBool, StepOutputHandle("true"), True) assert intermediate_storage.has_intermediate( context, StepOutputHandle("true")) assert (intermediate_storage.get_intermediate( context, RuntimeBool, StepOutputHandle("true")).obj is True) assert intermediate_storage.uri_for_paths(["true" ]).startswith("s3://") intermediate_storage_2.copy_intermediate_from_run( context, run_id, StepOutputHandle("true")) assert intermediate_storage_2.has_intermediate( context, StepOutputHandle("true")) assert (intermediate_storage_2.get_intermediate( context, RuntimeBool, StepOutputHandle("true")).obj is True) finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("true")) intermediate_storage_2.rm_intermediate(context, StepOutputHandle("true"))
def test_s3_intermediate_storage(s3_bucket): run_id = make_new_run_id() run_id_2 = make_new_run_id() intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=s3_bucket) assert intermediate_storage.root == '/'.join( ['dagster', 'storage', run_id]) intermediate_storage_2 = S3IntermediateStorage(run_id=run_id_2, s3_bucket=s3_bucket) assert intermediate_storage_2.root == '/'.join( ['dagster', 'storage', run_id_2]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate(context, RuntimeBool, StepOutputHandle('true'), True) assert intermediate_storage.has_intermediate( context, StepOutputHandle('true')) assert (intermediate_storage.get_intermediate( context, RuntimeBool, StepOutputHandle('true')).obj is True) assert intermediate_storage.uri_for_paths(['true' ]).startswith('s3://') intermediate_storage_2.copy_intermediate_from_run( context, run_id, StepOutputHandle('true')) assert intermediate_storage_2.has_intermediate( context, StepOutputHandle('true')) assert (intermediate_storage_2.get_intermediate( context, RuntimeBool, StepOutputHandle('true')).obj is True) finally: intermediate_storage.rm_intermediate(context, StepOutputHandle('true')) intermediate_storage_2.rm_intermediate(context, StepOutputHandle('true'))
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id()) with pytest.raises(DagsterStepOutputNotFoundError): execute_plan( execution_plan.build_subset_plan(['add_one.compute']), DagsterInstance.ephemeral(), environment_dict=environment_dict, pipeline_run=pipeline_run, )
def test_delete_with_tags(self, storage): assert storage run_id = make_new_run_id() storage.add_run( TestRunStorage.build_run( run_id=run_id, pipeline_name="some_pipeline", tags={run_id: run_id}, )) assert len(storage.get_runs()) == 1 assert run_id in [key for key, value in storage.get_run_tags()] storage.delete_run(run_id) assert list(storage.get_runs()) == [] assert run_id not in [key for key, value in storage.get_run_tags()]
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'passing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=passing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS events = instance.all_logs(run_id) assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1 process_exited_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_EXITED) assert len(process_exited_events) == 1
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() old_run_id = make_new_run_id() environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id), instance=instance, ) assert result.success ## re-execute add_two new_run_id = make_new_run_id() pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=new_run_id, environment_dict=environment_dict, mode='default', previous_run_id=result.run_id, ) execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, run_config=pipeline_run ) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan.build_subset_plan(['add_two.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )
def test_fan_out_should_skip_step(): @solid(output_defs=[ OutputDefinition(Int, "out_1", is_required=False), OutputDefinition(Int, "out_2", is_required=False), OutputDefinition(Int, "out_3", is_required=False), ]) def foo(_): yield Output(1, "out_1") @solid def bar(_, input_arg): return input_arg @pipeline def optional_outputs(): foo_res = foo() # pylint: disable=no-member bar.alias("bar_1")(input_arg=foo_res.out_1) bar.alias("bar_2")(input_arg=foo_res.out_2) bar.alias("bar_3")(input_arg=foo_res.out_3) instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name="optional_outputs", run_id=make_new_run_id()) execute_plan( create_execution_plan(optional_outputs, step_keys_to_execute=["foo"]), InMemoryPipeline(optional_outputs), instance, pipeline_run, ) assert not should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_1" ]), instance, pipeline_run.run_id, ) assert should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_2"]), instance, pipeline_run.run_id, ) assert should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_3"]), instance, pipeline_run.run_id, )
def test_fetch_run_filter(self, storage): assert storage one = make_new_run_id() two = make_new_run_id() storage.add_run( TestRunStorage.build_run( run_id=one, pipeline_name="some_pipeline", status=PipelineRunStatus.SUCCESS, )) storage.add_run( TestRunStorage.build_run( run_id=two, pipeline_name="some_pipeline", status=PipelineRunStatus.SUCCESS, ), ) assert len(storage.get_runs()) == 2 some_runs = storage.get_runs(PipelineRunsFilter(run_ids=[one, two])) count = storage.get_runs_count(PipelineRunsFilter(run_ids=[one, two])) assert len(some_runs) == 2 assert count == 2
def test_file_system_intermediate_store(): run_id = make_new_run_id() instance = DagsterInstance.ephemeral() intermediate_store = build_fs_intermediate_store( instance.intermediates_directory, run_id=run_id ) with yield_empty_pipeline_context(run_id=run_id, instance=instance) as context: intermediate_store.set_object(True, context, RuntimeBool, ['true']) assert intermediate_store.has_object(context, ['true']) assert intermediate_store.get_object(context, RuntimeBool, ['true']).obj is True assert intermediate_store.uri_for_paths(['true']).startswith('file:///') assert intermediate_store.rm_object(context, ['true']) is None assert intermediate_store.rm_object(context, ['true']) is None assert intermediate_store.rm_object(context, ['dslkfhjsdflkjfs']) is None
def pipeline_run_from_execution_params(execution_params): check.inst_param(execution_params, 'execution_params', ExecutionParams) return PipelineRun( pipeline_name=execution_params.selector.name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), selector=execution_params.selector, environment_dict=execution_params.environment_dict, mode=execution_params.mode, step_keys_to_execute=execution_params.step_keys, tags=execution_params.execution_metadata.tags, status=PipelineRunStatus.NOT_STARTED, previous_run_id=execution_params.previous_run_id, )
def test_fetch_by_snapshot_id(self, storage): assert storage pipeline_def_a = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_def_b = PipelineDefinition(name="some_other_pipeline", solid_defs=[]) pipeline_snapshot_a = pipeline_def_a.get_pipeline_snapshot() pipeline_snapshot_b = pipeline_def_b.get_pipeline_snapshot() pipeline_snapshot_a_id = create_pipeline_snapshot_id(pipeline_snapshot_a) pipeline_snapshot_b_id = create_pipeline_snapshot_id(pipeline_snapshot_b) assert storage.add_pipeline_snapshot(pipeline_snapshot_a) == pipeline_snapshot_a_id assert storage.add_pipeline_snapshot(pipeline_snapshot_b) == pipeline_snapshot_b_id one = make_new_run_id() two = make_new_run_id() storage.add_run( TestRunStorage.build_run( run_id=one, pipeline_name="some_pipeline", pipeline_snapshot_id=pipeline_snapshot_a_id, ) ) storage.add_run( TestRunStorage.build_run( run_id=two, pipeline_name="some_other_pipeline", pipeline_snapshot_id=pipeline_snapshot_b_id, ) ) assert len(storage.get_runs()) == 2 runs_a = storage.get_runs(PipelineRunsFilter(snapshot_id=pipeline_snapshot_a_id)) assert len(runs_a) == 1 assert runs_a[0].run_id == one runs_b = storage.get_runs(PipelineRunsFilter(snapshot_id=pipeline_snapshot_b_id)) assert len(runs_b) == 1 assert runs_b[0].run_id == two
def in_pipeline_manager( pipeline_name='hello_world_pipeline', solid_handle=SolidHandle('hello_world', 'hello_world', None), handle_kwargs=None, mode=None, **kwargs ): manager = Manager() run_id = make_new_run_id() instance = DagsterInstance.local_temp() marshal_dir = tempfile.mkdtemp() if not handle_kwargs: handle_kwargs = { 'pipeline_name': pipeline_name, 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_world_pipeline', } pipeline_run_dict = pack_value( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, mode=mode or 'default', environment_dict=None, selector=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) ) try: with safe_tempfile_path() as output_log_file_path: context_dict = { 'pipeline_run_dict': pipeline_run_dict, 'solid_handle_kwargs': solid_handle._asdict(), 'handle_kwargs': handle_kwargs, 'marshal_dir': marshal_dir, 'environment_dict': {}, 'output_log_path': output_log_file_path, 'instance_ref_dict': pack_value(instance.get_ref()), } manager.reconstitute_pipeline_context(**dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def test_file_system_intermediate_store_composite_types_with_custom_serializer_for_inner_type(): run_id = make_new_run_id() instance = DagsterInstance.ephemeral() intermediate_store = build_fs_intermediate_store( instance.intermediates_directory, run_id=run_id ) with yield_empty_pipeline_context(run_id=run_id, instance=instance) as context: intermediate_store.set_object( ['foo', 'bar'], context, resolve_dagster_type(List[LowercaseString]), ['list'] ) assert intermediate_store.has_object(context, ['list']) assert intermediate_store.get_object( context, resolve_dagster_type(List[Bool]), ['list'] ).obj == ['foo', 'bar']
def test_gcs_intermediate_store_composite_types_with_custom_serializer_for_inner_type(gcs_bucket): run_id = make_new_run_id() intermediate_store = GCSIntermediateStore(run_id=run_id, gcs_bucket=gcs_bucket) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_store.set_object( ['foo', 'bar'], context, resolve_dagster_type(List[LowercaseString]), ['list'], ) assert intermediate_store.has_object(context, ['list']) assert intermediate_store.get_object( context, resolve_dagster_type(List[Bool]), ['list'] ).obj == ['foo', 'bar'] finally: intermediate_store.rm_object(context, ['foo'])
def test_file_system_intermediate_store_composite_types(): run_id = make_new_run_id() instance = DagsterInstance.ephemeral() intermediate_store = build_fs_intermediate_store( instance.intermediates_directory, run_id=run_id ) with yield_empty_pipeline_context(instance=instance, run_id=run_id) as context: intermediate_store.set_object( [True, False], context, resolve_dagster_type(List[Bool]), ['bool'] ) assert intermediate_store.has_object(context, ['bool']) assert intermediate_store.get_object( context, resolve_dagster_type(List[Bool]), ['bool'] ).obj == [True, False]
def test_error_dag_python(): # pylint: disable=redefined-outer-name pipeline_name = 'demo_error_pipeline' recon_repo = ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, 'env_filesystem.yaml'), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_for_recon_repo(recon_repo, pipeline_name, run_config) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_synchronously_execute_run_within_hosted_user_process_not_found( self, graphql_context): run_id = make_new_run_id() result = execute_dagster_graphql( graphql_context, EXECUTE_RUN_IN_PROCESS_MUTATION, variables={ "runId": run_id, "repositoryLocationName": main_repo_location_name(), "repositoryName": main_repo_name(), }, ) assert result.data assert result.data["executeRunInProcess"][ "__typename"] == "PipelineRunNotFoundError"
def __new__( cls, run_id=None, tags=None, step_keys_to_execute=None, mode=None, previous_run_id=None, ): check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) tags = check.opt_dict_param(tags, 'tags', key_type=str) return super(RunConfig, cls).__new__( cls, run_id=check.str_param(run_id, 'run_id') if run_id else make_new_run_id(), tags=tags, step_keys_to_execute=step_keys_to_execute, mode=check.opt_str_param(mode, 'mode'), previous_run_id=check.opt_str_param(previous_run_id, 'previous_run_id'), )
def test_s3_intermediate_store_with_composite_type_storage_plugin(s3_bucket): run_id = make_new_run_id() intermediate_store = S3IntermediateStore( run_id=run_id, s3_bucket=s3_bucket, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringS3TypeStoragePlugin) ]), ) with yield_empty_pipeline_context(run_id=run_id) as context: with pytest.raises(check.NotImplementedCheckError): intermediate_store.set_value(['hello'], context, resolve_dagster_type(List[String]), ['obj_name'])
def test_file_system_intermediate_store_with_custom_serializer(): run_id = make_new_run_id() instance = DagsterInstance.ephemeral() intermediate_store = build_fs_intermediate_store( instance.intermediates_directory, run_id=run_id ) with yield_empty_pipeline_context(run_id=run_id, instance=instance) as context: intermediate_store.set_object('foo', context, LowercaseString, ['foo']) with open(os.path.join(intermediate_store.root, 'foo'), 'rb') as fd: assert fd.read().decode('utf-8') == 'FOO' assert intermediate_store.has_object(context, ['foo']) assert intermediate_store.get_object(context, LowercaseString, ['foo']).obj == 'foo'
def pipeline_run_args_from_execution_params(execution_params, step_keys_to_execute=None): check.inst_param(execution_params, 'execution_params', ExecutionParams) return dict( pipeline_name=execution_params.selector.name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), selector=execution_params.selector, environment_dict=execution_params.environment_dict, mode=execution_params.mode, step_keys_to_execute=step_keys_to_execute or execution_params.step_keys, tags=execution_params.execution_metadata.tags, root_run_id=execution_params.execution_metadata.root_run_id, parent_run_id=execution_params.execution_metadata.parent_run_id, status=PipelineRunStatus.NOT_STARTED, )
def test_basic_storage(self, storage): assert storage run_id = make_new_run_id() added = storage.add_run( TestRunStorage.build_run(run_id=run_id, pipeline_name='some_pipeline') ) assert added runs = storage.get_runs() assert len(runs) == 1 run = runs[0] assert run.run_id == run_id assert run.pipeline_name == 'some_pipeline' assert storage.has_run(run_id) fetched_run = storage.get_run_by_id(run_id) assert fetched_run.run_id == run_id assert fetched_run.pipeline_name == 'some_pipeline'
def test_synchronously_execute_run_within_hosted_user_process_not_found( self, graphql_context): run_id = make_new_run_id() result = execute_dagster_graphql( graphql_context, EXECUTE_RUN_IN_PROCESS_QUERY, variables={ 'runId': run_id, 'repositoryLocationName': main_repo_location_name(), 'repositoryName': main_repo_name(), }, ) assert result.data assert result.data['executeRunInProcess'][ '__typename'] == 'PipelineRunNotFoundError'
def test_spark_data_frame_serialization_file_system_file_handle(spark_config): @solid def nonce(_): return LocalFileHandle(file_relative_path(__file__, 'data/test.csv')) @pipeline(mode_defs=[spark_mode]) def spark_df_test_pipeline(): ingest_csv_file_handle_to_spark(nonce()) run_id = make_new_run_id() instance = DagsterInstance.ephemeral() intermediate_store = build_fs_intermediate_store( instance.intermediates_directory, run_id=run_id) result = execute_pipeline( spark_df_test_pipeline, run_config=RunConfig(run_id=run_id, mode='spark'), environment_dict={ 'storage': { 'filesystem': {} }, 'resources': { 'spark': { 'config': { 'spark_conf': spark_config } } }, }, instance=instance, ) assert result.success result_dir = os.path.join( intermediate_store.root, 'intermediates', 'ingest_csv_file_handle_to_spark.compute', 'result', ) assert '_SUCCESS' in os.listdir(result_dir) spark = SparkSession.builder.getOrCreate() df = spark.read.parquet(result_dir) assert isinstance(df, pyspark.sql.dataframe.DataFrame) assert df.head()[0] == '1'
def test_run_record_stats(self, storage): assert storage self._skip_in_memory(storage) run_id = make_new_run_id() run_to_add = TestRunStorage.build_run(pipeline_name="pipeline_name", run_id=run_id) storage.add_run(run_to_add) run_record = storage.get_run_records( PipelineRunsFilter(run_ids=[run_id]))[0] assert run_record.start_time is None assert run_record.end_time is None storage.handle_run_event( run_id, DagsterEvent( message="a message", event_type_value=DagsterEventType.PIPELINE_START.value, pipeline_name="pipeline_name", ), ) run_record = storage.get_run_records( PipelineRunsFilter(run_ids=[run_id]))[0] assert run_record.start_time is not None assert run_record.end_time is None storage.handle_run_event( run_id, DagsterEvent( message="a message", event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name="pipeline_name", ), ) run_record = storage.get_run_records( PipelineRunsFilter(run_ids=[run_id]))[0] assert run_record.start_time is not None assert run_record.end_time is not None assert run_record.end_time >= run_record.start_time
def start_pipeline_execution(graphene_info, execution_params, reexecution_config): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.inst_param(execution_params, 'execution_params', ExecutionParams) check.opt_inst_param(reexecution_config, 'reexecution_config', ReexecutionConfig) pipeline_run_storage = graphene_info.context.pipeline_runs dauphin_pipeline = get_dauphin_pipeline_from_selector( graphene_info, execution_params.selector) execution_plan = create_execution_plan( dauphin_pipeline.get_dagster_pipeline(), get_validated_config( graphene_info, dauphin_pipeline, environment_dict=execution_params.environment_dict, mode=execution_params.mode, ).value, mode=execution_params.mode, ) _check_start_pipeline_execution_errors(graphene_info, execution_params, execution_plan, reexecution_config) run = pipeline_run_storage.create_run( run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), selector=execution_params.selector, env_config=execution_params.environment_dict, mode=execution_params.mode, execution_plan=execution_plan, reexecution_config=reexecution_config, step_keys_to_execute=execution_params.step_keys, ) pipeline_run_storage.add_run(run) graphene_info.context.execution_manager.execute_pipeline( graphene_info.context.get_handle(), dauphin_pipeline.get_dagster_pipeline(), run, raise_on_error=graphene_info.context.raise_on_error, ) return graphene_info.schema.type_named('StartPipelineExecutionSuccess')( run=graphene_info.schema.type_named('PipelineRun')(run))
def _prepare_message(self, orig_message, message_props): check.str_param(orig_message, 'orig_message') check.dict_param(message_props, 'message_props') # These are todos to further align with the Python logging API check.invariant( 'extra' not in message_props, 'do not allow until explicit support is handled' ) check.invariant( 'exc_info' not in message_props, 'do not allow until explicit support is handled' ) # Reserved keys in the message_props -- these are system generated. check.invariant('orig_message' not in message_props, 'orig_message reserved value') check.invariant('message' not in message_props, 'message reserved value') check.invariant('log_message_id' not in message_props, 'log_message_id reserved value') check.invariant('log_timestamp' not in message_props, 'log_timestamp reserved value') log_message_id = make_new_run_id() log_timestamp = datetime.datetime.utcnow().isoformat() synth_props = { 'orig_message': orig_message, 'log_message_id': log_message_id, 'log_timestamp': log_timestamp, 'run_id': self.run_id, } # We first generate all props for the purpose of producing the semi-structured # log message via _kv_messsage all_props = dict( itertools.chain(synth_props.items(), self.logging_tags.items(), message_props.items()) ) # So here we use the arbitrary key DAGSTER_META_KEY to store a dictionary of # all the meta information that dagster injects into log message. # The python logging module, in its infinite wisdom, actually takes all the # keys in extra and unconditionally smashes them into the internal dictionary # of the logging.LogRecord class. We used a reserved key here to avoid naming # collisions with internal variables of the LogRecord class. # See __init__.py:363 (makeLogRecord) in the python 3.6 logging module source # for the gory details. return ( construct_log_string(synth_props, self.logging_tags, message_props), {DAGSTER_META_KEY: all_props}, )