def in_process_executor(init_context): """The default in-process executor. In most Dagster environments, this will be the default executor. It is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly, include the following top-level fragment in config: .. code-block:: yaml execution: in_process: Execution priority can be configured using the ``dagster/priority`` tag via solid metadata, where the higher the number the higher the priority. 0 is the default and both positive and negative numbers can be used. """ from dagster.core.executor.init import InitExecutorContext from dagster.core.executor.in_process import InProcessExecutor check.inst_param(init_context, "init_context", InitExecutorContext) return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=Retries.from_config( init_context.executor_config.get("retries", {"enabled": {}})), marker_to_close=init_context.executor_config.get("marker_to_close"), )
def multiprocess_executor(init_context): """The default multiprocess executor. This simple multiprocess executor is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select the multiprocess executor, include a fragment such as the following in your config: .. code-block:: yaml execution: multiprocess: config: max_concurrent: 4 The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of :py:func:`python:multiprocessing.cpu_count`. Execution priority can be configured using the ``dagster/priority`` tag via solid metadata, where the higher the number the higher the priority. 0 is the default and both positive and negative numbers can be used. """ from dagster.core.executor.init import InitExecutorContext from dagster.core.executor.multiprocess import MultiprocessExecutor check.inst_param(init_context, "init_context", InitExecutorContext) check_cross_process_constraints(init_context) return MultiprocessExecutor( pipeline=init_context.pipeline, max_concurrent=init_context.executor_config["max_concurrent"], retries=Retries.from_config(init_context.executor_config["retries"]), )
def execute_step_command(input_json): try: signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) except ValueError: warnings.warn(( "Unexpected error attempting to manage signal handling on thread {thread_name}. " "You should not invoke this API (execute_step) from threads " "other than the main thread.").format( thread_name=threading.current_thread().name)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution".format( args.pipeline_run_id), ) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) retries = Retries.from_config(args.retries_dict) if args.should_verify_step: success = verify_step(instance, pipeline_run, retries, args.step_keys_to_execute) if not success: return execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple(StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def initialize_step_context(scratch_dir): pipeline_run = PipelineRun( pipeline_name='foo_pipeline', run_id=str(uuid.uuid4()), run_config=make_run_config(scratch_dir, 'external'), mode='external', ) plan = create_execution_plan(reconstructable(define_basic_pipeline), pipeline_run.run_config, mode='external') initialization_manager = pipeline_initialization_manager( plan, pipeline_run.run_config, pipeline_run, DagsterInstance.ephemeral(), ) for _ in initialization_manager.generate_setup_events(): pass pipeline_context = initialization_manager.get_object() active_execution = plan.start(retries=Retries(RetryMode.DISABLED)) step = active_execution.get_next_step() step_context = pipeline_context.for_step(step) return step_context
def execute_step_with_structured_logs_command(input_json): signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with ( DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get() ) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute ), run_config=args.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=args.mode, ) retries = Retries.from_config(args.retries_dict) buff = [] for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=args.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def execute_plan_iterator( execution_plan, pipeline_run, instance, retries=None, run_config=None, ): check.inst_param(execution_plan, "execution_plan", ExecutionPlan) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) retries = check.opt_inst_param(retries, "retries", Retries, Retries.disabled_mode()) run_config = check.opt_dict_param(run_config, "run_config") return iter( _ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=inner_plan_execution_iterator, execution_context_manager=PlanExecutionContextManager( retries=retries, execution_plan=execution_plan, run_config=run_config, pipeline_run=pipeline_run, instance=instance, raise_on_error=False, ), ))
def test_retries_deferred_active_execution(): pipeline_def = define_diamond_pipeline() plan = create_execution_plan(pipeline_def) active_execution = plan.start(retries=Retries(RetryMode.DEFERRED)) steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == 'return_two.compute' steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_up_for_retry(step_1.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress, retries are deferred assert not active_execution.is_complete steps = active_execution.get_steps_to_skip() # skip split of diamond assert len(steps) == 2 _ = [active_execution.mark_skipped(step.key) for step in steps] assert not active_execution.is_complete steps = active_execution.get_steps_to_skip() # skip end of diamond assert len(steps) == 1 active_execution.mark_skipped(steps[0].key) assert active_execution.is_complete
def test_retries_active_execution(): pipeline_def = define_diamond_pipeline() plan = create_execution_plan(pipeline_def) active_execution = plan.start(retries=Retries(RetryMode.ENABLED)) steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == 'return_two.compute' steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_up_for_retry(step_1.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 1 assert steps[0].key == 'return_two.compute' active_execution.mark_up_for_retry(step_1.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 1 assert steps[0].key == 'return_two.compute' active_execution.mark_success(step_1.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 2 step_2 = steps[0] step_3 = steps[1] assert step_2.key == 'add_three.compute' assert step_3.key == 'mult_three.compute' steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_2.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress # uh oh failure active_execution.mark_failed(step_3.key) # cant progres to 4th step steps = active_execution.get_steps_to_execute() assert len(steps) == 0 assert not active_execution.is_complete steps = active_execution.get_steps_to_skip() assert len(steps) == 1 step_4 = steps[0] assert step_4.key == 'adder.compute' active_execution.mark_skipped(step_4.key) assert active_execution.is_complete
def _execute_plan(self, execute_step_args_packed, executable_dict): execute_step_args = unpack_value( check.dict_param( execute_step_args_packed, "execute_step_args_packed", )) check.inst_param(execute_step_args, "execute_step_args", ExecuteStepArgs) check.dict_param(executable_dict, "executable_dict") instance = DagsterInstance.from_ref(execute_step_args.instance_ref) pipeline = ReconstructablePipeline.from_dict(executable_dict) retries = Retries.from_config(execute_step_args.retries_dict) pipeline_run = instance.get_run_by_id( execute_step_args.pipeline_run_id) check.invariant( pipeline_run, "Could not load run {}".format(execute_step_args.pipeline_run_id)) step_keys_str = ", ".join(execute_step_args.step_keys_to_execute) execution_plan = create_execution_plan( pipeline, pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=execute_step_args.step_keys_to_execute, ) engine_event = instance.report_engine_event( "Executing steps {} in celery worker".format(step_keys_str), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, "step_keys"), EventMetadataEntry.text(self.request.hostname, "Celery worker"), ], marker_end=DELEGATE_MARKER, ), CeleryExecutor, step_key=execution_plan.step_key_for_single_step_plans(), ) events = [engine_event] for step_event in execute_plan_iterator( execution_plan, pipeline_run=pipeline_run, run_config=pipeline_run.run_config, instance=instance, retries=retries, ): events.append(step_event) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def celery_executor(init_context): '''Celery-based executor. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a celery executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_celery import celery_executor @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])]) def celery_enabled_pipeline(): pass Then you can configure the executor as follows: .. code-block:: YAML execution: celery: config: broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. ''' check_cross_process_constraints(init_context) return CeleryConfig( broker=init_context.executor_config.get('broker'), backend=init_context.executor_config.get('backend'), config_source=init_context.executor_config.get('config_source'), include=init_context.executor_config.get('include'), retries=Retries.from_config(init_context.executor_config['retries']), )
def for_cli(broker=None, backend=None, include=None, config_source=None): return CeleryConfig( retries=Retries(RetryMode.DISABLED), broker=broker, backend=backend, include=include, config_source=config_source, )
def test_executor(init_context): from dagster.core.executor.in_process import InProcessExecutor assert init_context.executor_config["value"] == "secret testing value!!" return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=Retries.from_config({"enabled": {}}), marker_to_close=None, )
def test_incomplete_execution_plan(): plan = create_execution_plan(define_diamond_pipeline()) with pytest.raises(DagsterIncompleteExecutionPlanError): with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] active_execution.mark_success(step_1.key)
def _execute_plan(_self, instance_ref_dict, handle_dict, run_id, step_keys, retries_dict): check.dict_param(instance_ref_dict, 'instance_ref_dict') check.dict_param(handle_dict, 'handle_dict') check.str_param(run_id, 'run_id') check.list_param(step_keys, 'step_keys', of_type=str) check.dict_param(retries_dict, 'retries_dict') instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) handle = ExecutionTargetHandle.from_dict(handle_dict) retries = Retries.from_config(retries_dict) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) pipeline_def = handle.build_pipeline_definition().build_sub_pipeline( pipeline_run.selector.solid_subset) step_keys_str = ", ".join(step_keys) execution_plan = create_execution_plan( pipeline_def, pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ).build_subset_plan(step_keys) engine_event = instance.report_engine_event( 'Executing steps {} in celery worker'.format(step_keys_str), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, 'step_keys'), ], marker_end=DELEGATE_MARKER, ), CeleryEngine, step_key=execution_plan.step_key_for_single_step_plans(), ) events = [engine_event] for step_event in execute_plan_iterator( execution_plan, pipeline_run=pipeline_run, environment_dict=pipeline_run.environment_dict, instance=instance, retries=retries, ): events.append(step_event) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def build_flyte_sdk_workflow(self): ordered_step_dict = self.execution_plan.execution_deps() instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( pipeline_name=self.execution_plan.pipeline_def.display_name, run_id=self.execution_plan.pipeline_def.display_name, run_config=self.run_config, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=self.execution_plan.pipeline_def. get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( self.execution_plan, self.execution_plan.pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=self.execution_plan.pipeline_def. get_parent_pipeline_snapshot(), ) initialization_manager = PlanExecutionContextManager( Retries.disabled_mode(), self.execution_plan, self.run_config, instance.get_run_by_id( self.execution_plan.pipeline_def.display_name), instance, ) list(initialization_manager.prepare_context()) pipeline_context = initialization_manager.get_context() for step_key in ordered_step_dict: solid_name = self.execution_plan.get_step_by_key( step_key).solid_name self.sdk_node_dict[solid_name] = self.get_sdk_node( pipeline_context, instance, pipeline_run, step_key, storage_request=self.compute_dict[solid_name].get( "storage_request", None), cpu_request=self.compute_dict[solid_name].get( "cpu_request", None), memory_request=self.compute_dict[solid_name].get( "memory_request", None), storage_limit=self.compute_dict[solid_name].get( "storage_limit", None), cpu_limit=self.compute_dict[solid_name].get("cpu_limit", None), memory_limit=self.compute_dict[solid_name].get( "memory_limit", None), )
def execute_step_command(input_json): with capture_interrupts(): args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution". format(args.pipeline_run_id), ) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) retries = Retries.from_config(args.retries_dict) if args.should_verify_step: success = verify_step(instance, pipeline_run, retries, args.step_keys_to_execute) if not success: return execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple( StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def _execute_plan(_self, instance_ref_dict, executable_dict, run_id, step_keys, retries_dict): check.dict_param(instance_ref_dict, "instance_ref_dict") check.dict_param(executable_dict, "executable_dict") check.str_param(run_id, "run_id") check.list_param(step_keys, "step_keys", of_type=str) check.dict_param(retries_dict, "retries_dict") instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline = ReconstructablePipeline.from_dict(executable_dict) retries = Retries.from_config(retries_dict) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, "Could not load run {}".format(run_id)) step_keys_str = ", ".join(step_keys) execution_plan = create_execution_plan( pipeline, pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ).build_subset_plan(step_keys) engine_event = instance.report_engine_event( "Executing steps {} in celery worker".format(step_keys_str), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, "step_keys"), ], marker_end=DELEGATE_MARKER, ), CeleryExecutor, step_key=execution_plan.step_key_for_single_step_plans(), ) events = [engine_event] for step_event in execute_plan_iterator( execution_plan, pipeline_run=pipeline_run, run_config=pipeline_run.run_config, instance=instance, retries=retries, ): events.append(step_event) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def test_incomplete_execution_plan(): plan = create_execution_plan(define_diamond_pipeline()) with pytest.raises( DagsterInvariantViolationError, match="Execution of pipeline finished without completing the execution plan.", ): with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] active_execution.mark_success(step_1.key)
def test_failing_execution_plan(): pipeline_def = define_diamond_pipeline() plan = create_execution_plan(pipeline_def) with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == "return_two" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_1.key) active_execution.mark_step_produced_output(StepOutputHandle(step_1.key, "result")) steps = active_execution.get_steps_to_execute() assert len(steps) == 2 step_2 = steps[0] step_3 = steps[1] assert step_2.key == "add_three" assert step_3.key == "mult_three" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_2.key) active_execution.mark_step_produced_output(StepOutputHandle(step_2.key, "result")) steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress # uh oh failure active_execution.mark_failed(step_3.key) active_execution.mark_step_produced_output(StepOutputHandle(step_3.key, "result")) # cant progres to 4th step steps = active_execution.get_steps_to_execute() assert len(steps) == 0 assert not active_execution.is_complete steps = active_execution.get_steps_to_abandon() assert len(steps) == 1 step_4 = steps[0] assert step_4.key == "adder" active_execution.mark_abandoned(step_4.key) assert active_execution.is_complete
def test_retry_deferral(): events = execute_plan( create_execution_plan(define_retry_limit_pipeline()), pipeline_run=PipelineRun(pipeline_name='retry_limits', run_id='42'), retries=Retries(RetryMode.DEFERRED), instance=DagsterInstance.local_temp(), ) events_by_type = defaultdict(list) for ev in events: events_by_type[ev.event_type].append(ev) assert len(events_by_type[DagsterEventType.STEP_START]) == 2 assert len(events_by_type[DagsterEventType.STEP_UP_FOR_RETRY]) == 2 assert DagsterEventType.STEP_RESTARTED not in events assert DagsterEventType.STEP_SUCCESS not in events
def test_retries_disabled_active_execution(): pipeline_def = define_diamond_pipeline() plan = create_execution_plan(pipeline_def) active_execution = plan.start(retries=Retries(RetryMode.DISABLED)) steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == "return_two.compute" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress with pytest.raises(check.CheckError): active_execution.mark_up_for_retry(step_1.key)
def test_priorities(): @solid(tags={"priority": 5}) def pri_5(_): pass @solid(tags={"priority": 4}) def pri_4(_): pass @solid(tags={"priority": 3}) def pri_3(_): pass @solid(tags={"priority": 2}) def pri_2(_): pass @solid(tags={"priority": -1}) def pri_neg_1(_): pass @solid def pri_none(_): pass @pipeline def priorities(): pri_neg_1() pri_3() pri_2() pri_none() pri_5() pri_4() sort_key_fn = lambda step: int(step.tags.get("priority", 0)) * -1 plan = create_execution_plan(priorities) with plan.start(Retries(RetryMode.DISABLED), sort_key_fn) as active_execution: steps = active_execution.get_steps_to_execute() assert steps[0].key == "pri_5.compute" assert steps[1].key == "pri_4.compute" assert steps[2].key == "pri_3.compute" assert steps[3].key == "pri_2.compute" assert steps[4].key == "pri_none.compute" assert steps[5].key == "pri_neg_1.compute" _ = [active_execution.mark_skipped(step.key) for step in steps]
def __init__( self, instance_config_map, dagster_home, postgres_password_secret, load_incluster_config=True, kubeconfig_file=None, broker=None, backend=None, include=None, config_source=None, retries=None, inst_data=None, k8s_client_batch_api=None, ): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) if load_incluster_config: check.invariant( kubeconfig_file is None, "`kubeconfig_file` is set but `load_incluster_config` is True.", ) kubernetes.config.load_incluster_config() else: check.opt_str_param(kubeconfig_file, "kubeconfig_file") kubernetes.config.load_kube_config(kubeconfig_file) self._batch_api = k8s_client_batch_api or kubernetes.client.BatchV1Api( ) self.instance_config_map = check.str_param(instance_config_map, "instance_config_map") self.dagster_home = check.str_param(dagster_home, "dagster_home") self.postgres_password_secret = check.str_param( postgres_password_secret, "postgres_password_secret") self.broker = check.opt_str_param(broker, "broker") self.backend = check.opt_str_param(backend, "backend") self.include = check.opt_list_param(include, "include") self.config_source = check.opt_dict_param(config_source, "config_source") retries = check.opt_dict_param(retries, "retries") or {"enabled": {}} self.retries = Retries.from_config(retries) super().__init__()
def test_active_execution_plan(): plan = create_execution_plan(define_diamond_pipeline()) with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == "return_two.compute" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_1.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 2 step_2 = steps[0] step_3 = steps[1] assert step_2.key == "add_three.compute" assert step_3.key == "mult_three.compute" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_2.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_3.key) steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_4 = steps[0] assert step_4.key == "adder.compute" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress assert not active_execution.is_complete active_execution.mark_success(step_4.key) assert active_execution.is_complete
def test_priorities(): @solid(tags={'priority': 5}) def pri_5(_): pass @solid(tags={'priority': 4}) def pri_4(_): pass @solid(tags={'priority': 3}) def pri_3(_): pass @solid(tags={'priority': 2}) def pri_2(_): pass @solid(tags={'priority': -1}) def pri_neg_1(_): pass @solid def pri_none(_): pass @pipeline def priorities(): pri_neg_1() pri_3() pri_2() pri_none() pri_5() pri_4() sort_key_fn = lambda step: int(step.tags.get('priority', 0)) * -1 plan = create_execution_plan(priorities) active_execution = plan.start(Retries(RetryMode.DISABLED), sort_key_fn) steps = active_execution.get_steps_to_execute() assert steps[0].key == 'pri_5.compute' assert steps[1].key == 'pri_4.compute' assert steps[2].key == 'pri_3.compute' assert steps[3].key == 'pri_2.compute' assert steps[4].key == 'pri_none.compute' assert steps[5].key == 'pri_neg_1.compute'
def execute_plan_iterator( execution_plan, pipeline_run, instance, retries=None, environment_dict=None, ): check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) retries = check.opt_inst_param(retries, 'retries', Retries, Retries.disabled_mode()) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') initialization_manager = pipeline_initialization_manager( execution_plan.pipeline_def, environment_dict, pipeline_run, instance, execution_plan, ) for event in initialization_manager.generate_setup_events(): yield event pipeline_context = initialization_manager.get_object() generator_closed = False try: if pipeline_context: for event in inner_plan_execution_iterator( pipeline_context, execution_plan=execution_plan, retries=retries): yield event except GeneratorExit: # Shouldn't happen, but avoid runtime-exception in case this generator gets GC-ed # (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/). generator_closed = True raise finally: for event in initialization_manager.generate_teardown_events(): if not generator_closed: yield event
def execute_plan_iterator( execution_plan, pipeline_run, instance, retries=None, run_config=None, ): check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) retries = check.opt_inst_param(retries, 'retries', Retries, Retries.disabled_mode()) run_config = check.opt_dict_param(run_config, 'run_config') return iter( _ExecuteRunWithPlanIterable( execution_plan=execution_plan, run_config=run_config, pipeline_run=pipeline_run, instance=instance, retries=retries, iterator=inner_plan_execution_iterator, raise_on_error=False, ) )
def __init__( self, instance_config_map, dagster_home, postgres_password_secret, load_incluster_config=True, kubeconfig_file=None, broker=None, backend=None, include=None, config_source=None, retries=None, inst_data=None, ): self._inst_data = check.opt_inst_param(inst_data, 'inst_data', ConfigurableClassData) if load_incluster_config: check.invariant( kubeconfig_file is None, '`kubeconfig_file` is set but `load_incluster_config` is True.', ) kubernetes.config.load_incluster_config() else: check.opt_str_param(kubeconfig_file, 'kubeconfig_file') kubernetes.config.load_kube_config(kubeconfig_file) self.instance_config_map = check.str_param(instance_config_map, 'instance_config_map') self.dagster_home = check.str_param(dagster_home, 'dagster_home') self.postgres_password_secret = check.str_param( postgres_password_secret, 'postgres_password_secret') self.broker = check.opt_str_param(broker, 'broker') self.backend = check.opt_str_param(backend, 'backend') self.include = check.opt_list_param(include, 'include') self.config_source = check.opt_dict_param(config_source, 'config_source') retries = check.opt_dict_param(retries, 'retries') or {'enabled': {}} self.retries = Retries.from_config(retries) self._instance_ref = None
def test_lost_steps(): plan = create_execution_plan(define_diamond_pipeline()) # run to completion - but step was in unknown state so exception thrown with pytest.raises(DagsterUnknownStepStateError): with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] # called by verify_complete when success / fail event not observed active_execution.mark_unknown_state(step_1.key) # failure assumed for start step - so rest should skip steps_to_abandon = active_execution.get_steps_to_abandon() while steps_to_abandon: _ = [active_execution.mark_abandoned(step.key) for step in steps_to_abandon] steps_to_abandon = active_execution.get_steps_to_abandon() assert active_execution.is_complete
def execute_step_with_structured_logs_command(input_json): try: signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) except ValueError: warnings.warn(( "Unexpected error attempting to manage signal handling on thread {thread_name}. " "You should not invoke this API (execute_step_with_structured_logs) from threads " "other than the main thread.").format( thread_name=threading.current_thread().name)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=args.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=args.mode, ) retries = Retries.from_config(args.retries_dict) buff = [] for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=args.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)