def launch_step(self, step_handler_context: StepHandlerContext): events = [] assert (len( step_handler_context.execute_step_args.step_keys_to_execute) == 1 ), "Launching multiple steps is not currently supported" step_key = step_handler_context.execute_step_args.step_keys_to_execute[ 0] k8s_name_key = get_k8s_job_name( step_handler_context.execute_step_args.pipeline_run_id, step_key, ) job_name = "dagster-job-%s" % (k8s_name_key) pod_name = "dagster-job-%s" % (k8s_name_key) input_json = serialize_dagster_namedtuple( step_handler_context.execute_step_args) args = ["dagster", "api", "execute_step", input_json] job_config = self._job_config if not job_config.job_image: job_config = job_config.with_image( step_handler_context.execute_step_args.pipeline_origin. repository_origin.container_image) if not job_config.job_image: raise Exception( "No image included in either executor config or the pipeline") user_defined_k8s_config = get_user_defined_k8s_config( frozentags(step_handler_context.step_tags[step_key])) job = construct_dagster_k8s_job( job_config=job_config, args=args, job_name=job_name, pod_name=pod_name, component="step_worker", user_defined_k8s_config=user_defined_k8s_config, ) events.append( DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=step_handler_context.execute_step_args. pipeline_origin.pipeline_name, step_key=step_key, message= f"Executing step {step_key} in Kubernetes job {job_name}", event_specific_data=EngineEventData([ EventMetadataEntry.text(step_key, "Step key"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), ], ), )) self._batch_api.create_namespaced_job(body=job, namespace=self._job_namespace) return events
def launch_step( self, step_handler_context: StepHandlerContext) -> List[DagsterEvent]: client = self._get_client() step_image = (step_handler_context.execute_step_args.pipeline_origin. repository_origin.container_image) if not step_image: step_image = self._image if not step_image: raise Exception( "No docker image specified by the executor config or repository" ) validate_docker_image(step_image) try: step_container = self._create_step_container( client, step_image, step_handler_context.execute_step_args) except docker.errors.ImageNotFound: client.images.pull(step_image) step_container = self._create_step_container( client, step_image, step_handler_context.execute_step_args) if len(self._networks) > 1: for network_name in self._networks[1:]: network = client.networks.get(network_name) network.connect(step_container) assert (len( step_handler_context.execute_step_args.step_keys_to_execute) == 1 ), "Launching multiple steps is not currently supported" step_key = step_handler_context.execute_step_args.step_keys_to_execute[ 0] events = [ DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=step_handler_context.execute_step_args. pipeline_origin.pipeline_name, step_key=step_key, message="Launching step in Docker container", event_specific_data=EngineEventData([ EventMetadataEntry.text(step_key, "Step key"), EventMetadataEntry.text(step_container.id, "Docker container id"), ], ), ) ] step_container.start() return events
def launch_run(self, instance, run, external_pipeline): if isinstance( external_pipeline.get_external_origin(). external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}". format(type(repository_location_handle))) repository_name = external_pipeline.repository_handle.repository_name location_name = external_pipeline.repository_handle.repository_location_handle.location_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle. get_repository_python_origin(repository_name), ) else: location_name = 'local' pipeline_origin = external_pipeline.get_python_origin() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) app = self._get_app(location_name) sig = app.signature('launch_run', args=(input_json, ), queue=f"{location_name}-pipelines") result = sig.delay() instance.report_engine_event( "Started Celery task for pipeline (task id: {result.id}).".format( result=result), run, EngineEventData(metadata_entries=[ EventMetadataEntry.text(result.id, "task_id"), ]), ) return run
def launch_run(self, context: LaunchRunContext) -> None: """ Launch a run in an ECS task. Currently, Fargate is the only supported launchType and awsvpc is the only supported networkMode. These are the defaults that are set up by docker-compose when you use the Dagster ECS reference deployment. """ run = context.pipeline_run metadata = self._task_metadata() pipeline_origin = context.pipeline_code_origin image = pipeline_origin.repository_origin.container_image task_definition = self._task_definition(metadata, image)["family"] args = ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=self._instance.get_ref(), ) command = args.get_command_args() # Set cpu or memory overrides # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html overrides = {} tags = self._get_run_tags(run.run_id) if tags.cpu: overrides["cpu"] = tags.cpu if tags.memory: overrides["memory"] = tags.memory # Run a task using the same network configuration as this processes's # task. response = self.ecs.run_task( taskDefinition=task_definition, cluster=metadata.cluster, overrides={ "containerOverrides": [{ "name": self.container_name, "command": command }], **overrides, }, networkConfiguration={ "awsvpcConfiguration": { "subnets": metadata.subnets, "assignPublicIp": metadata.assign_public_ip, "securityGroups": metadata.security_groups, } }, launchType="FARGATE", ) arn = response["tasks"][0]["taskArn"] self._set_run_tags(run.run_id, task_arn=arn) self._set_ecs_tags(run.run_id, task_arn=arn) self._instance.report_engine_event( message="Launching run in ECS task", pipeline_run=run, engine_event_data=EngineEventData([ EventMetadataEntry.text(arn, "ECS Task ARN"), EventMetadataEntry.text(metadata.cluster, "ECS Cluster"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, )
def execute(self, plan_context: PlanOrchestrationContext, execution_plan: ExecutionPlan): check.inst_param(plan_context, "plan_context", PlanOrchestrationContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) self._event_cursor = -1 # pylint: disable=attribute-defined-outside-init yield DagsterEvent.engine_event( plan_context, f"Starting execution with step handler {self._step_handler.name}", EngineEventData(), ) with execution_plan.start(retry_mode=self.retries) as active_execution: running_steps: Dict[str, ExecutionStep] = {} if plan_context.resume_from_failure: yield DagsterEvent.engine_event( plan_context, "Resuming execution from failure", EngineEventData(), ) prior_events = self._pop_events( plan_context.instance, plan_context.run_id, ) for dagster_event in prior_events: yield dagster_event possibly_in_flight_steps = active_execution.rebuild_from_events( prior_events) for step in possibly_in_flight_steps: yield DagsterEvent.engine_event( plan_context, "Checking on status of possibly launched steps", EngineEventData(), step.handle, ) # TODO: check if failure event included. For now, hacky assumption that # we don't log anything on successful check if self._step_handler.check_step_health( self._get_step_handler_context( plan_context, [step], active_execution)): # health check failed, launch the step self._log_new_events( self._step_handler.launch_step( self._get_step_handler_context( plan_context, [step], active_execution)), plan_context, { step.key: step for step in possibly_in_flight_steps }, ) running_steps[step.key] = step last_check_step_health_time = pendulum.now("UTC") # Order of events is important here. During an interation, we call handle_event, then get_steps_to_execute, # then is_complete. get_steps_to_execute updates the state of ActiveExecution, and without it # is_complete can return true when we're just between steps. while not active_execution.is_complete: if active_execution.check_for_interrupts(): if not plan_context.instance.run_will_resume( plan_context.run_id): yield DagsterEvent.engine_event( plan_context, "Executor received termination signal, forwarding to steps", EngineEventData.interrupted( list(running_steps.keys())), ) active_execution.mark_interrupted() for _, step in running_steps.items(): self._log_new_events( self._step_handler.terminate_step( self._get_step_handler_context( plan_context, [step], active_execution)), plan_context, running_steps, ) else: yield DagsterEvent.engine_event( plan_context, "Executor received termination signal, not forwarding to steps because " "run will be resumed", EngineEventData(metadata_entries=[ EventMetadataEntry.text( str(running_steps.keys()), "steps_in_flight") ]), ) active_execution.mark_interrupted() return for dagster_event in self._pop_events( plan_context.instance, plan_context.run_id, ): # type: ignore # STEP_SKIPPED events are only emitted by ActiveExecution, which already handles # and yields them. if dagster_event.is_step_skipped: assert isinstance(dagster_event.step_key, str) active_execution.verify_complete( plan_context, dagster_event.step_key) else: yield dagster_event active_execution.handle_event(dagster_event) if dagster_event.is_step_success or dagster_event.is_step_failure: assert isinstance(dagster_event.step_key, str) del running_steps[dagster_event.step_key] active_execution.verify_complete( plan_context, dagster_event.step_key) # process skips from failures or uncovered inputs for event in active_execution.plan_events_iterator( plan_context): yield event curr_time = pendulum.now("UTC") if (curr_time - last_check_step_health_time).total_seconds( ) >= self._check_step_health_interval_seconds: last_check_step_health_time = curr_time for _, step in running_steps.items(): self._log_new_events( self._step_handler.check_step_health( self._get_step_handler_context( plan_context, [step], active_execution)), plan_context, running_steps, ) for step in active_execution.get_steps_to_execute(): running_steps[step.key] = step self._log_new_events( self._step_handler.launch_step( self._get_step_handler_context( plan_context, [step], active_execution)), plan_context, running_steps, ) time.sleep(self._sleep_seconds)
def create_valid_pipeline_run(graphene_info, external_pipeline, execution_params): ensure_valid_config(external_pipeline, execution_params.mode, execution_params.run_config) step_keys_to_execute = compute_step_keys_to_execute( graphene_info, external_pipeline, execution_params) external_execution_plan = get_external_execution_plan_or_raise( graphene_info=graphene_info, external_pipeline=external_pipeline, mode=execution_params.mode, run_config=execution_params.run_config, step_keys_to_execute=step_keys_to_execute, ) tags = merge_dicts(external_pipeline.tags, execution_params.execution_metadata.tags) pipeline_run = graphene_info.context.instance.create_run( pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=external_execution_plan. execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, pipeline_name=execution_params.selector.pipeline_name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), solids_to_execute=frozenset(execution_params.selector.solid_selection) if execution_params.selector.solid_selection else None, run_config=execution_params.run_config, mode=execution_params.mode, step_keys_to_execute=step_keys_to_execute, tags=tags, root_run_id=execution_params.execution_metadata.root_run_id, parent_run_id=execution_params.execution_metadata.parent_run_id, status=PipelineRunStatus.NOT_STARTED, external_pipeline_origin=external_pipeline.get_external_origin(), ) # TODO: support memoized execution from dagit. https://github.com/dagster-io/dagster/issues/3322 if is_memoized_run(tags): graphene_info.context.instance.report_engine_event( 'Tag "{tag}" was found when initializing pipeline run, however, memoized ' "execution is only supported from the dagster CLI. This pipeline will run, but " "outputs from previous executions will be ignored. " "In order to execute this pipeline using memoization, provide the " '"{tag}" tag to the `dagster pipeline execute` CLI. The CLI is documented at ' "the provided link.".format(tag=MEMOIZED_RUN_TAG), pipeline_run, EngineEventData([ EventMetadataEntry.url( "https://docs.dagster.io/_apidocs/cli#dagster-pipeline-execute", label="dagster_pipeline_execute_docs_url", description= "In order to execute this pipeline using memoization, provide the " '"{tag}" tag to the `dagster pipeline execute` CLI. The CLI is documented at ' "the provided link.".format(tag=MEMOIZED_RUN_TAG), ) ]), ) return pipeline_run
def test_default_launcher( ecs, instance, workspace, run, subnet, image, environment, task_long_arn_format, ): ecs.put_account_setting(name="taskLongArnFormat", value=task_long_arn_format) assert not run.tags initial_task_definitions = ecs.list_task_definitions( )["taskDefinitionArns"] initial_tasks = ecs.list_tasks()["taskArns"] instance.launch_run(run.run_id, workspace) # A new task definition is created task_definitions = ecs.list_task_definitions()["taskDefinitionArns"] assert len(task_definitions) == len(initial_task_definitions) + 1 task_definition_arn = list( set(task_definitions).difference(initial_task_definitions))[0] task_definition = ecs.describe_task_definition( taskDefinition=task_definition_arn) task_definition = task_definition["taskDefinition"] # It has a new family, name, and image assert task_definition["family"] == "dagster-run" assert len(task_definition["containerDefinitions"]) == 1 container_definition = task_definition["containerDefinitions"][0] assert container_definition["name"] == "run" assert container_definition["image"] == image assert not container_definition.get("entryPoint") # But other stuff is inhereted from the parent task definition assert container_definition["environment"] == environment # A new task is launched tasks = ecs.list_tasks()["taskArns"] assert len(tasks) == len(initial_tasks) + 1 task_arn = list(set(tasks).difference(initial_tasks))[0] task = ecs.describe_tasks(tasks=[task_arn])["tasks"][0] assert subnet.id in str(task) assert task["taskDefinitionArn"] == task_definition["taskDefinitionArn"] # The run is tagged with info about the ECS task assert instance.get_run_by_id(run.run_id).tags["ecs/task_arn"] == task_arn cluster_arn = ecs._cluster_arn("default") assert instance.get_run_by_id( run.run_id).tags["ecs/cluster"] == cluster_arn # If we're using the new long ARN format, # the ECS task is tagged with info about the Dagster run if task_long_arn_format == "enabled": assert (ecs.list_tags_for_resource( resourceArn=task_arn)["tags"][0]["key"] == "dagster/run_id") assert ecs.list_tags_for_resource( resourceArn=task_arn)["tags"][0]["value"] == run.run_id # We set pipeline-specific overides overrides = task["overrides"]["containerOverrides"] assert len(overrides) == 1 override = overrides[0] assert override["name"] == "run" assert "execute_run" in override["command"] assert run.run_id in str(override["command"]) # And we log events = instance.event_log_storage.get_logs_for_run(run.run_id) latest_event = events[-1] assert latest_event.message == "[EcsRunLauncher] Launching run in ECS task" event_metadata = latest_event.dagster_event.engine_event_data.metadata_entries assert EventMetadataEntry.text(task_arn, "ECS Task ARN") in event_metadata assert EventMetadataEntry.text(cluster_arn, "ECS Cluster") in event_metadata assert EventMetadataEntry.text(run.run_id, "Run ID") in event_metadata