def test_multiprocessing_execution_for_composite_solid(): environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}} } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'composite_pipeline') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, ) ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}} } }, 'execution': {'multiprocess': {}}, 'storage': {'filesystem': {}}, } run_id = make_new_run_id() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, ) ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance) execution_manager.join()
def execute_partition_set(partition_set, partition_filter, instance=None): check.inst_param(partition_set, 'partition_set', PartitionSetDefinition) check.callable_param(partition_filter, 'partition_filter') check.inst_param(instance, 'instance', DagsterInstance) candidate_partitions = partition_set.get_partitions() partitions = partition_filter(candidate_partitions) instance = instance or DagsterInstance.ephemeral() for partition in partitions: run = PipelineRun( pipeline_name=partition_set.pipeline_name, run_id=make_new_run_id(), selector=ExecutionSelector(partition_set.pipeline_name), environment_dict=partition_set.environment_dict_for_partition( partition), mode='default', tags=merge_dicts({'dagster/backfill': 'custom'}, partition_set.tags_for_partition(partition)), status=PipelineRunStatus.NOT_STARTED, ) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) instance.run_launcher.launch_run(run)
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'crashy_pipeline') environment_dict = { 'solids': {'sum_solid': {'inputs': {'num': file_relative_path(__file__, 'data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=crashy_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, ) ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE last_log = instance.all_logs(run_id)[-1] assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format( run_id=run_id ) )
def pipeline_launch_command(env, preset_name, mode, **kwargs): env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str)) pipeline = create_pipeline_from_cli_args(kwargs) instance = DagsterInstance.get() if preset_name: if env: raise click.UsageError('Can not use --preset with --env.') if mode: raise click.UsageError('Can not use --preset with --mode.') preset = pipeline.get_preset(preset_name) else: preset = None run_tags = get_tags_from_args(kwargs) run = PipelineRun( pipeline_name=pipeline.name, run_id=make_new_run_id(), selector=ExecutionSelector(pipeline.name, preset.solid_subset if preset else None), environment_dict=preset.environment_dict if preset else load_yaml_from_glob_list(env), mode=(preset.mode if preset else mode) or 'default', status=PipelineRunStatus.NOT_STARTED, tags=run_tags, ) return instance.launch_run(run)
def test_roundtrip_run(): run = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', selector=ExecutionSelector('pipey_mcpipeface'), step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, previous_run_id='previousID', ) for field in run: # ensure we have a test value to round trip for each field assert field exec_params = execution_params_from_pipeline_run(run) assert run == pipeline_run_from_execution_params(exec_params) exec_params_gql = execution_params_from_graphql( exec_params.to_graphql_input()) assert exec_params_gql == exec_params assert run == pipeline_run_from_execution_params(exec_params_gql) empty_run = PipelineRun.create_empty_run('foo', 'bar') exec_params = execution_params_from_pipeline_run(empty_run) assert empty_run == pipeline_run_from_execution_params(exec_params) exec_params_gql = execution_params_from_graphql( exec_params.to_graphql_input()) assert exec_params_gql == exec_params assert empty_run == pipeline_run_from_execution_params(exec_params_gql)
def python_callable(ts, dag_run, **kwargs): # pylint: disable=unused-argument run_id = dag_run.run_id # TODO: https://github.com/dagster-io/dagster/issues/1342 redacted = construct_variables(mode, 'REDACTED', pipeline_name, run_id, ts, step_keys) logging.info('Executing GraphQL query: {query}\n'.format( query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(redacted, indent=2)) instance = DagsterInstance.from_ref( instance_ref) if instance_ref else None if instance: instance.get_or_create_run( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, selector=ExecutionSelector(pipeline_name), reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.MANAGED, )) events = execute_execute_plan_mutation( handle, construct_variables(mode, environment_dict, pipeline_name, run_id, ts, step_keys), instance_ref=instance_ref, ) check_events_for_skips(events) return events
def test_running(): handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'passing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, 'data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline=passing_pipeline, selector=selector, environment_dict=environment_dict, ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id( pipeline_run.run_id).status == PipelineRunStatus.SUCCESS events = instance.all_logs(pipeline_run.run_id) assert events engine_events = get_events_of_type(events, DagsterEventType.ENGINE_EVENT) assert (len([ ev for ev in engine_events if 'SubprocessExecutionManager' in ev.message ]) == 3) # starting, started, exit
def test_execution_crash(): handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'crashy_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, 'data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline=crashy_pipeline, selector=selector, environment_dict=environment_dict, ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id( pipeline_run.run_id).status == PipelineRunStatus.FAILURE crash_log = instance.all_logs(pipeline_run.run_id)[ -2] # last message is pipeline failure, second to last is... assert crash_log.message.startswith( '[SubprocessExecutionManager] Pipeline execution process for {run_id} unexpectedly exited' .format(run_id=pipeline_run.run_id))
def test_failing(): handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, 'data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline=failing_pipeline, selector=selector, environment_dict=environment_dict, ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id( pipeline_run.run_id).status == PipelineRunStatus.FAILURE assert instance.all_logs(pipeline_run.run_id)
def __init__( self, name, cron_schedule, pipeline_name, environment_dict=None, environment_dict_fn=None, tags=None, tags_fn=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): check.str_param(name, 'name') check.str_param(cron_schedule, 'cron_schedule') check.str_param(pipeline_name, 'pipeline_name') check.opt_dict_param(environment_dict, 'environment_dict') check.opt_callable_param(environment_dict_fn, 'environment_dict_fn') check.opt_dict_param(tags, 'tags', key_type=str, value_type=str) check.opt_callable_param(tags_fn, 'tags_fn') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) if environment_dict_fn and environment_dict: raise DagsterInvalidDefinitionError( 'Attempted to provide both environment_dict_fn and environment_dict as arguments' ' to ScheduleDefinition. Must provide only one of the two.' ) if tags_fn and tags: raise DagsterInvalidDefinitionError( 'Attempted to provide both tags_fn and tags as arguments' ' to ScheduleDefinition. Must provide only one of the two.' ) if not environment_dict and not environment_dict_fn: environment_dict_fn = lambda _context: {} if not tags and not tags_fn: tags_fn = lambda _context: {} if not should_execute: should_execute = lambda _context: True self._schedule_definition_data = ScheduleDefinitionData( name=check.str_param(name, 'name'), cron_schedule=check.str_param(cron_schedule, 'cron_schedule'), environment_vars=check.opt_dict_param(environment_vars, 'environment_vars'), ) self._environment_dict = environment_dict self._environment_dict_fn = environment_dict_fn self._tags = tags self._tags_fn = tags_fn self._should_execute = should_execute self._mode = mode self._selector = ExecutionSelector(pipeline_name, solid_subset)
def execute_remote_pipeline_run(host, pipeline_name, environment_dict=None, tags=None, solid_subset=None, mode=None): check.str_param(host, 'host') check.str_param(pipeline_name, 'pipeline_name') environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) check.opt_dict_param(tags, 'tags', key_type=str, value_type=str) check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) selector = ExecutionSelector(pipeline_name, solid_subset) execution_params = ExecutionParams( selector=selector, environment_dict=environment_dict, mode=mode, execution_metadata=ExecutionMetadata(run_id=None, tags=tags or {}), step_keys=None, previous_run_id=None, ) result = execute_query_against_remote( host, START_PIPELINE_EXECUTION_MUTATION, variables=json.dumps( {'executionParams': execution_params.to_graphql_input()}), ) return result
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, 'data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE assert instance.all_logs(run_id)
def start_scheduled_execution(graphene_info, schedule_name): from dagster_graphql.schema.roots import create_execution_metadata check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.str_param(schedule_name, 'schedule_name') schedule = get_dagster_schedule(graphene_info, schedule_name) schedule_def = get_dagster_schedule_def(graphene_info, schedule_name) schedule_context = ScheduleExecutionContext(graphene_info.context.instance) # Run should_execute and halt if it returns False if not schedule_def.should_execute(schedule_context): return graphene_info.schema.type_named( 'ScheduledExecutionBlocked' )(message= 'Schedule {schedule_name} did not run because the should_execute did not return' ' True'.format(schedule_name=schedule_name)) # Get environment_dict environment_dict = schedule_def.get_environment_dict(schedule_context) tags = schedule_def.get_tags(schedule_context) check.invariant('dagster/schedule_id' not in tags) tags['dagster/schedule_id'] = schedule.schedule_id check.invariant('dagster/schedule_name' not in tags) tags['dagster/schedule_name'] = schedule_def.name execution_metadata_tags = [{ 'key': key, 'value': value } for key, value in tags.items()] execution_params = merge_dicts( schedule_def.execution_params, {'executionMetadata': { 'tags': execution_metadata_tags }}) selector = ExecutionSelector( execution_params['selector']['name'], execution_params['selector'].get('solidSubset')) execution_params = ExecutionParams( selector=selector, environment_dict=environment_dict, mode=execution_params.get('mode'), execution_metadata=create_execution_metadata( execution_params.get('executionMetadata')), step_keys=execution_params.get('stepKeys'), previous_run_id=None, ) # Launch run if run launcher is defined run_launcher = graphene_info.context.instance.run_launcher if run_launcher: return launch_pipeline_execution(graphene_info, execution_params) return start_pipeline_execution(graphene_info, execution_params)
def start_scheduled_execution(graphene_info, schedule_name): from dagster_graphql.schema.roots import create_execution_metadata check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.str_param(schedule_name, 'schedule_name') schedule = get_dagster_schedule(graphene_info, schedule_name) schedule_def = get_dagster_schedule_def(graphene_info, schedule_name) # Run should_execute and halt if it returns False should_execute = schedule_def.should_execute if should_execute() != True: return graphene_info.schema.type_named('ScheduledExecutionBlocked')( message='Schedule {schedule_name} did not run because the should_execute did not return ' 'True' ) # Add dagster/schedule_id tag to executionMetadata execution_params = merge_dicts( {'executionMetadata': {'tags': []}}, schedule_def.execution_params ) # Check that the dagster/schedule_id tag is not already set check.invariant( not any( tag['key'] == 'dagster/schedule_id' for tag in execution_params['executionMetadata']['tags'] ), "Tag dagster/schedule_id tag is already defined in executionMetadata.tags", ) # Check that the dagster/schedule_name tag is not already set check.invariant( not any( tag['key'] == 'dagster/schedule_name' for tag in execution_params['executionMetadata']['tags'] ), "Tag dagster/schedule_name tag is already defined in executionMetadata.tags", ) execution_params['executionMetadata']['tags'].append( {'key': 'dagster/schedule_id', 'value': schedule.schedule_id} ) execution_params['executionMetadata']['tags'].append( {'key': 'dagster/schedule_name', 'value': schedule.name} ) selector = execution_params['selector'] execution_params = ExecutionParams( selector=ExecutionSelector(selector['name'], selector.get('solidSubset')), environment_dict=execution_params.get('environmentConfigData'), mode=execution_params.get('mode'), execution_metadata=create_execution_metadata(execution_params.get('executionMetadata')), step_keys=execution_params.get('stepKeys'), previous_run_id=None, ) return start_pipeline_execution(graphene_info, execution_params)
def __new__( cls, pipeline_name, run_id, environment_dict, mode, selector=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot_id=None, ## GRAVEYARD BELOW # see https://github.com/dagster-io/dagster/issues/2372 for explanation previous_run_id=None, ): from dagster.core.definitions.pipeline import ExecutionSelector root_run_id = check.opt_str_param(root_run_id, 'root_run_id') parent_run_id = check.opt_str_param(parent_run_id, 'parent_run_id') check.invariant( (root_run_id is not None and parent_run_id is not None) or (root_run_id is None and parent_run_id is None), ('Must set both root_run_id and parent_run_id when creating a PipelineRun that ' 'belongs to a run group'), ) # Historical runs may have previous_run_id set, in which case # that previous ID becomes both the root and the parent if previous_run_id: if not (parent_run_id and root_run_id): parent_run_id = previous_run_id root_run_id = previous_run_id return super(PipelineRun, cls).__new__( cls, pipeline_name=check.str_param(pipeline_name, 'pipeline_name'), run_id=check.str_param(run_id, 'run_id'), environment_dict=check.opt_dict_param(environment_dict, 'environment_dict', key_type=str), mode=check.str_param(mode, 'mode'), selector=check.opt_inst_param(selector, 'selector', ExecutionSelector, ExecutionSelector(pipeline_name)), step_keys_to_execute=None if step_keys_to_execute is None else check.list_param( step_keys_to_execute, 'step_keys_to_execute', of_type=str), status=check.opt_inst_param(status, 'status', PipelineRunStatus, PipelineRunStatus.NOT_STARTED), tags=check.opt_dict_param(tags, 'tags', key_type=str), root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot_id=check.opt_str_param(pipeline_snapshot_id, 'pipeline_snapshot_id'), )
def get_runtime_type(graphene_info, pipeline_name, type_name): pipeline = get_dagster_pipeline_from_selector( graphene_info, ExecutionSelector(pipeline_name)) if not pipeline.has_runtime_type(type_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('RuntimeTypeNotFoundError')( pipeline=pipeline, runtime_type_name=type_name)) return to_dauphin_runtime_type(pipeline.runtime_type_named(type_name))
def create_empty_run(pipeline_name, run_id, environment_dict=None, tags=None): from dagster.core.definitions.pipeline import ExecutionSelector return PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode='default', selector=ExecutionSelector(pipeline_name), step_keys_to_execute=None, tags=tags, status=PipelineRunStatus.NOT_STARTED, )
def create_run_for_pipeline( self, pipeline, execution_plan=None, run_id=None, environment_dict=None, mode=None, selector=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap.execution_plan_snapshot import snapshot_from_execution_plan check.inst_param(pipeline, 'pipeline', PipelineDefinition) check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan) if execution_plan is None: execution_plan = create_execution_plan( pipeline, environment_dict=environment_dict, mode=mode, step_keys_to_execute=step_keys_to_execute, ) return self.get_or_create_run( pipeline_name=pipeline.name, run_id=run_id, environment_dict=environment_dict, mode=check.opt_str_param(mode, 'mode', default=pipeline.get_default_mode_name()), selector=check.opt_inst_param( selector, 'selector', ExecutionSelector, default=ExecutionSelector(name=pipeline.name), ), step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline.get_pipeline_snapshot_id()), )
def build_run( run_id, pipeline_name, mode='default', tags=None, status=PipelineRunStatus.NOT_STARTED ): return PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=None, mode=mode, selector=ExecutionSelector(pipeline_name), reexecution_config=None, step_keys_to_execute=None, tags=tags, status=status, )
def get_dagster_type(graphene_info, pipeline_name, type_name): dauphin_pipeline = get_dauphin_pipeline_from_selector( graphene_info, ExecutionSelector(pipeline_name)) pipeline_index = dauphin_pipeline.get_pipeline_index() if not pipeline_index.has_dagster_type_name(type_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('RuntimeTypeNotFoundError')( pipeline=dauphin_pipeline, runtime_type_name=type_name)) return to_dauphin_dagster_type( pipeline_index.pipeline_snapshot, pipeline_index.get_dagster_type_from_name(type_name).key, )
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'passing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=passing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS events = instance.all_logs(run_id) assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1 process_exited_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_EXITED) assert len(process_exited_events) == 1
def create_execution_params(graphene_info, graphql_execution_params): preset_name = graphql_execution_params.get('preset') if preset_name: check.invariant( not graphql_execution_params.get('environmentConfigData'), 'Invalid ExecutionParams. Cannot define environment_dict when using preset', ) check.invariant( not graphql_execution_params.get('mode'), 'Invalid ExecutionParams. Cannot define mode when using preset', ) selector = graphql_execution_params['selector'].to_selector() check.invariant( not selector.solid_subset, 'Invalid ExecutionParams. Cannot define selector.solid_subset when using preset', ) dauphin_pipeline = get_dauphin_pipeline_reference_from_selector(graphene_info, selector) pipeline = dauphin_pipeline.get_dagster_pipeline() if not pipeline.has_preset(preset_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('PresetNotFoundError')( preset=preset_name, selector=selector ) ) preset = pipeline.get_preset(preset_name) return ExecutionParams( selector=ExecutionSelector(selector.name, preset.solid_subset), environment_dict=preset.environment_dict, mode=preset.mode, execution_metadata=ExecutionMetadata(run_id=None, tags={}), step_keys=graphql_execution_params.get('stepKeys'), previous_run_id=graphql_execution_params.get('retryRunId'), ) return ExecutionParams( selector=graphql_execution_params['selector'].to_selector(), environment_dict=graphql_execution_params.get('environmentConfigData'), mode=graphql_execution_params.get('mode'), execution_metadata=create_execution_metadata( graphql_execution_params.get('executionMetadata') ), step_keys=graphql_execution_params.get('stepKeys'), previous_run_id=graphql_execution_params.get('retryRunId'), )
def get_config_type(graphene_info, pipeline_name, config_type_name, mode): check.str_param(pipeline_name, 'pipeline_name') check.str_param(config_type_name, 'config_type_name') check.opt_str_param(mode, 'mode') pipeline = get_dagster_pipeline_from_selector( graphene_info, ExecutionSelector(pipeline_name)) environment_schema = create_environment_schema(pipeline, mode) if not environment_schema.has_config_type(config_type_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('ConfigTypeNotFoundError')( pipeline=pipeline, config_type_name=config_type_name)) return to_dauphin_config_type( environment_schema.config_type_named(config_type_name))
def _get_partition_sets(graphene_info, pipeline_name): partition_sets = graphene_info.context.get_all_partition_sets() if pipeline_name: pipeline_def = get_pipeline_def_from_selector( graphene_info, ExecutionSelector(pipeline_name)) matching_partition_sets = filter( lambda partition_set: partition_set.pipeline_name == pipeline_def. name, partition_sets) else: matching_partition_sets = partition_sets return [ graphene_info.schema.type_named('PartitionSet')(partition_set) for partition_set in matching_partition_sets ]
def test_roundtrip_run(): run_with_snapshot = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', selector=ExecutionSelector('pipey_mcpipeface'), step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, root_run_id='previousID', parent_run_id='previousID', pipeline_snapshot_id='pipey_mcpipeface_snapshot_id', execution_plan_snapshot_id='mcexecutionplanface_snapshot_id', ) for field in run_with_snapshot: # ensure we have a test value to round trip for each field assert field # The invariant that all the execution parameter structs # pipeline run can be constructed from each other is no longer # true. Clients of the GraphQL API cannot know the value of the # pipeline_snapshot_id prior to execution, because it is # constructed on the server. Hence these roundtrip tests # do not include snapshot_id run = run_with_snapshot._replace(pipeline_snapshot_id=None, execution_plan_snapshot_id=None) exec_params = execution_params_from_pipeline_run(run) for key, value in pipeline_run_args_from_execution_params(exec_params).items(): assert getattr(run, key) == value exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input()) assert exec_params_gql == exec_params for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items(): assert getattr(run, key) == value empty_run = PipelineRun(pipeline_name='foo', run_id='bar', mode='default') exec_params = execution_params_from_pipeline_run(empty_run) for key, value in pipeline_run_args_from_execution_params(exec_params).items(): assert getattr(empty_run, key) == value exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input()) assert exec_params_gql == exec_params for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items(): assert getattr(empty_run, key) == value
def test_roundtrip_run(): run = PipelineRun( pipeline_name='pipey_mcpipeface', run_id='8675309', environment_dict={'good': True}, mode='default', selector=ExecutionSelector('pipey_mcpipeface'), step_keys_to_execute=['step_1', 'step_2', 'step_3'], tags={'tag_it': 'bag_it'}, status=PipelineRunStatus.NOT_STARTED, ) assert run == pipeline_run_from_execution_params( execution_params_from_pipeline_run(run)) empty_run = PipelineRun.create_empty_run('foo', 'bar') assert empty_run == pipeline_run_from_execution_params( execution_params_from_pipeline_run(empty_run))
def __new__( cls, pipeline_name, run_id, environment_dict, mode, selector=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, previous_run_id=None, pipeline_snapshot_id=None, ): from dagster.core.definitions.pipeline import ExecutionSelector tags = check.opt_dict_param(tags, 'tags', key_type=str) selector = check.opt_inst_param(selector, 'selector', ExecutionSelector) if not selector: selector = ExecutionSelector(pipeline_name) if not status: status = PipelineRunStatus.NOT_STARTED return super(PipelineRun, cls).__new__( cls, pipeline_name=check.str_param(pipeline_name, 'pipeline_name'), run_id=check.str_param(run_id, 'run_id'), environment_dict=check.opt_dict_param( environment_dict, 'environment_dict', key_type=str ), mode=check.str_param(mode, 'mode'), selector=selector, step_keys_to_execute=None if step_keys_to_execute is None else check.list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str), status=status, tags=check.opt_dict_param(tags, 'tags', key_type=str), root_run_id=check.opt_str_param(root_run_id, 'root_run_id'), parent_run_id=check.opt_str_param(parent_run_id, 'parent_run_id'), previous_run_id=check.opt_str_param(previous_run_id, 'previous_run_id'), pipeline_snapshot_id=check.opt_str_param(pipeline_snapshot_id, 'pipeline_snapshot_id'), )
def invoke_steps_within_python_operator(invocation_args, ts, dag_run, **kwargs): # pylint: disable=unused-argument mode = invocation_args.mode pipeline_name = invocation_args.pipeline_name step_keys = invocation_args.step_keys instance_ref = invocation_args.instance_ref environment_dict = invocation_args.environment_dict handle = invocation_args.handle pipeline_snapshot = invocation_args.pipeline_snapshot execution_plan_snapshot = invocation_args.execution_plan_snapshot run_id = dag_run.run_id variables = construct_variables(mode, environment_dict, pipeline_name, run_id, step_keys) variables = add_airflow_tags(variables, ts) logging.info('Executing GraphQL query: {query}\n'.format( query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(variables, indent=2)) instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None if instance: instance.get_or_create_run( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, selector=ExecutionSelector(pipeline_name), step_keys_to_execute=None, tags=None, status=PipelineRunStatus.MANAGED, pipeline_snapshot=pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, ) events = execute_execute_plan_mutation( handle, variables, instance_ref=instance_ref, ) check_events_for_failures(events) check_events_for_skips(events) return events
def create_execution_params(graphene_info, graphql_execution_params): preset_name = graphql_execution_params.get('preset') if preset_name: check.invariant( not graphql_execution_params.get('environmentConfigData'), 'Invalid ExecutionParams. Cannot define environment_dict when using preset', ) check.invariant( not graphql_execution_params.get('mode'), 'Invalid ExecutionParams. Cannot define mode when using preset', ) selector = graphql_execution_params['selector'].to_selector() check.invariant( not selector.solid_subset, 'Invalid ExecutionParams. Cannot define selector.solid_subset when using preset', ) external_pipeline = get_external_pipeline_or_raise( graphene_info, selector.name) if not external_pipeline.has_preset(preset_name): raise UserFacingGraphQLError( graphene_info.schema.type_named('PresetNotFoundError')( preset=preset_name, selector=selector)) preset = external_pipeline.get_preset(preset_name) return ExecutionParams( selector=ExecutionSelector(selector.name, preset.solid_subset), environment_dict=preset.environment_dict, mode=preset.mode, execution_metadata=create_execution_metadata( graphql_execution_params.get('executionMetadata')), step_keys=graphql_execution_params.get('stepKeys'), ) return execution_params_from_graphql(graphql_execution_params)
def execute_partition_set(partition_set, partition_filter, instance=None): '''Programatically perform a backfill over a partition set Arguments: partition_set (PartitionSet): The base partition set to run the backfill over partition_filter (Callable[[List[Partition]]], List[Partition]): A function that takes a list of partitions and returns a filtered list of partitions to run the backfill over. instance (DagsterInstance): The instance to use to perform the backfill ''' check.inst_param(partition_set, 'partition_set', PartitionSetDefinition) check.callable_param(partition_filter, 'partition_filter') check.inst_param(instance, 'instance', DagsterInstance) candidate_partitions = partition_set.get_partitions() partitions = partition_filter(candidate_partitions) instance = instance or DagsterInstance.ephemeral() for partition in partitions: run = PipelineRun( pipeline_name=partition_set.pipeline_name, run_id=make_new_run_id(), selector=ExecutionSelector(partition_set.pipeline_name), environment_dict=partition_set.environment_dict_for_partition( partition), mode='default', tags=merge_dicts( PipelineRun.tags_for_backfill_id(make_new_backfill_id()), partition_set.tags_for_partition(partition), ), status=PipelineRunStatus.NOT_STARTED, ) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) instance.launch_run(run)