示例#1
0
    def launch_step(self, step_handler_context: StepHandlerContext):
        events = []

        assert (len(
            step_handler_context.execute_step_args.step_keys_to_execute) == 1
                ), "Launching multiple steps is not currently supported"
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        k8s_name_key = get_k8s_job_name(
            step_handler_context.execute_step_args.pipeline_run_id,
            step_key,
        )
        job_name = "dagster-job-%s" % (k8s_name_key)
        pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            step_handler_context.execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                step_handler_context.execute_step_args.pipeline_origin.
                repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the pipeline")

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(step_handler_context.step_tags[step_key]))

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="step_worker",
            user_defined_k8s_config=user_defined_k8s_config,
        )

        events.append(
            DagsterEvent(
                event_type_value=DagsterEventType.ENGINE_EVENT.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message=
                f"Executing step {step_key} in Kubernetes job {job_name}",
                event_specific_data=EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                ], ),
            ))

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self._job_namespace)

        return events
示例#2
0
    def launch_step(
            self,
            step_handler_context: StepHandlerContext) -> List[DagsterEvent]:
        client = self._get_client()

        step_image = (step_handler_context.execute_step_args.pipeline_origin.
                      repository_origin.container_image)

        if not step_image:
            step_image = self._image

        if not step_image:
            raise Exception(
                "No docker image specified by the executor config or repository"
            )

        validate_docker_image(step_image)

        try:
            step_container = self._create_step_container(
                client, step_image, step_handler_context.execute_step_args)
        except docker.errors.ImageNotFound:
            client.images.pull(step_image)
            step_container = self._create_step_container(
                client, step_image, step_handler_context.execute_step_args)

        if len(self._networks) > 1:
            for network_name in self._networks[1:]:
                network = client.networks.get(network_name)
                network.connect(step_container)

        assert (len(
            step_handler_context.execute_step_args.step_keys_to_execute) == 1
                ), "Launching multiple steps is not currently supported"
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        events = [
            DagsterEvent(
                event_type_value=DagsterEventType.ENGINE_EVENT.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message="Launching step in Docker container",
                event_specific_data=EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(step_container.id,
                                            "Docker container id"),
                ], ),
            )
        ]

        step_container.start()

        return events
示例#3
0
    def launch_run(self, instance, run, external_pipeline):
        if isinstance(
                external_pipeline.get_external_origin().
                external_repository_origin.repository_location_origin,
                GrpcServerRepositoryLocationOrigin,
        ):
            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".
                    format(type(repository_location_handle)))

            repository_name = external_pipeline.repository_handle.repository_name
            location_name = external_pipeline.repository_handle.repository_location_handle.location_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.
                get_repository_python_origin(repository_name),
            )
        else:
            location_name = 'local'
            pipeline_origin = external_pipeline.get_python_origin()

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        app = self._get_app(location_name)
        sig = app.signature('launch_run',
                            args=(input_json, ),
                            queue=f"{location_name}-pipelines")
        result = sig.delay()
        instance.report_engine_event(
            "Started Celery task for pipeline (task id: {result.id}).".format(
                result=result),
            run,
            EngineEventData(metadata_entries=[
                EventMetadataEntry.text(result.id, "task_id"),
            ]),
        )

        return run
示例#4
0
    def launch_run(self, context: LaunchRunContext) -> None:
        """
        Launch a run in an ECS task.

        Currently, Fargate is the only supported launchType and awsvpc is the
        only supported networkMode. These are the defaults that are set up by
        docker-compose when you use the Dagster ECS reference deployment.
        """
        run = context.pipeline_run
        metadata = self._task_metadata()
        pipeline_origin = context.pipeline_code_origin
        image = pipeline_origin.repository_origin.container_image
        task_definition = self._task_definition(metadata, image)["family"]

        args = ExecuteRunArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=run.run_id,
            instance_ref=self._instance.get_ref(),
        )
        command = args.get_command_args()

        # Set cpu or memory overrides
        # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html
        overrides = {}
        tags = self._get_run_tags(run.run_id)
        if tags.cpu:
            overrides["cpu"] = tags.cpu
        if tags.memory:
            overrides["memory"] = tags.memory

        # Run a task using the same network configuration as this processes's
        # task.
        response = self.ecs.run_task(
            taskDefinition=task_definition,
            cluster=metadata.cluster,
            overrides={
                "containerOverrides": [{
                    "name": self.container_name,
                    "command": command
                }],
                **overrides,
            },
            networkConfiguration={
                "awsvpcConfiguration": {
                    "subnets": metadata.subnets,
                    "assignPublicIp": metadata.assign_public_ip,
                    "securityGroups": metadata.security_groups,
                }
            },
            launchType="FARGATE",
        )

        arn = response["tasks"][0]["taskArn"]
        self._set_run_tags(run.run_id, task_arn=arn)
        self._set_ecs_tags(run.run_id, task_arn=arn)
        self._instance.report_engine_event(
            message="Launching run in ECS task",
            pipeline_run=run,
            engine_event_data=EngineEventData([
                EventMetadataEntry.text(arn, "ECS Task ARN"),
                EventMetadataEntry.text(metadata.cluster, "ECS Cluster"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
示例#5
0
    def execute(self, plan_context: PlanOrchestrationContext,
                execution_plan: ExecutionPlan):
        check.inst_param(plan_context, "plan_context",
                         PlanOrchestrationContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        self._event_cursor = -1  # pylint: disable=attribute-defined-outside-init

        yield DagsterEvent.engine_event(
            plan_context,
            f"Starting execution with step handler {self._step_handler.name}",
            EngineEventData(),
        )

        with execution_plan.start(retry_mode=self.retries) as active_execution:
            running_steps: Dict[str, ExecutionStep] = {}

            if plan_context.resume_from_failure:
                yield DagsterEvent.engine_event(
                    plan_context,
                    "Resuming execution from failure",
                    EngineEventData(),
                )

                prior_events = self._pop_events(
                    plan_context.instance,
                    plan_context.run_id,
                )
                for dagster_event in prior_events:
                    yield dagster_event

                possibly_in_flight_steps = active_execution.rebuild_from_events(
                    prior_events)
                for step in possibly_in_flight_steps:

                    yield DagsterEvent.engine_event(
                        plan_context,
                        "Checking on status of possibly launched steps",
                        EngineEventData(),
                        step.handle,
                    )

                    # TODO: check if failure event included. For now, hacky assumption that
                    # we don't log anything on successful check
                    if self._step_handler.check_step_health(
                            self._get_step_handler_context(
                                plan_context, [step], active_execution)):
                        # health check failed, launch the step
                        self._log_new_events(
                            self._step_handler.launch_step(
                                self._get_step_handler_context(
                                    plan_context, [step], active_execution)),
                            plan_context,
                            {
                                step.key: step
                                for step in possibly_in_flight_steps
                            },
                        )

                    running_steps[step.key] = step

            last_check_step_health_time = pendulum.now("UTC")

            # Order of events is important here. During an interation, we call handle_event, then get_steps_to_execute,
            # then is_complete. get_steps_to_execute updates the state of ActiveExecution, and without it
            # is_complete can return true when we're just between steps.
            while not active_execution.is_complete:

                if active_execution.check_for_interrupts():
                    if not plan_context.instance.run_will_resume(
                            plan_context.run_id):
                        yield DagsterEvent.engine_event(
                            plan_context,
                            "Executor received termination signal, forwarding to steps",
                            EngineEventData.interrupted(
                                list(running_steps.keys())),
                        )
                        active_execution.mark_interrupted()
                        for _, step in running_steps.items():
                            self._log_new_events(
                                self._step_handler.terminate_step(
                                    self._get_step_handler_context(
                                        plan_context, [step],
                                        active_execution)),
                                plan_context,
                                running_steps,
                            )

                    else:
                        yield DagsterEvent.engine_event(
                            plan_context,
                            "Executor received termination signal, not forwarding to steps because "
                            "run will be resumed",
                            EngineEventData(metadata_entries=[
                                EventMetadataEntry.text(
                                    str(running_steps.keys()),
                                    "steps_in_flight")
                            ]),
                        )
                        active_execution.mark_interrupted()

                    return

                for dagster_event in self._pop_events(
                        plan_context.instance,
                        plan_context.run_id,
                ):  # type: ignore

                    # STEP_SKIPPED events are only emitted by ActiveExecution, which already handles
                    # and yields them.
                    if dagster_event.is_step_skipped:
                        assert isinstance(dagster_event.step_key, str)
                        active_execution.verify_complete(
                            plan_context, dagster_event.step_key)

                    else:
                        yield dagster_event
                        active_execution.handle_event(dagster_event)

                        if dagster_event.is_step_success or dagster_event.is_step_failure:
                            assert isinstance(dagster_event.step_key, str)
                            del running_steps[dagster_event.step_key]
                            active_execution.verify_complete(
                                plan_context, dagster_event.step_key)

                # process skips from failures or uncovered inputs
                for event in active_execution.plan_events_iterator(
                        plan_context):
                    yield event

                curr_time = pendulum.now("UTC")
                if (curr_time - last_check_step_health_time).total_seconds(
                ) >= self._check_step_health_interval_seconds:
                    last_check_step_health_time = curr_time
                    for _, step in running_steps.items():
                        self._log_new_events(
                            self._step_handler.check_step_health(
                                self._get_step_handler_context(
                                    plan_context, [step], active_execution)),
                            plan_context,
                            running_steps,
                        )

                for step in active_execution.get_steps_to_execute():
                    running_steps[step.key] = step
                    self._log_new_events(
                        self._step_handler.launch_step(
                            self._get_step_handler_context(
                                plan_context, [step], active_execution)),
                        plan_context,
                        running_steps,
                    )

                time.sleep(self._sleep_seconds)
示例#6
0
def create_valid_pipeline_run(graphene_info, external_pipeline,
                              execution_params):
    ensure_valid_config(external_pipeline, execution_params.mode,
                        execution_params.run_config)

    step_keys_to_execute = compute_step_keys_to_execute(
        graphene_info, external_pipeline, execution_params)

    external_execution_plan = get_external_execution_plan_or_raise(
        graphene_info=graphene_info,
        external_pipeline=external_pipeline,
        mode=execution_params.mode,
        run_config=execution_params.run_config,
        step_keys_to_execute=step_keys_to_execute,
    )
    tags = merge_dicts(external_pipeline.tags,
                       execution_params.execution_metadata.tags)

    pipeline_run = graphene_info.context.instance.create_run(
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=external_execution_plan.
        execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        pipeline_name=execution_params.selector.pipeline_name,
        run_id=execution_params.execution_metadata.run_id
        if execution_params.execution_metadata.run_id else make_new_run_id(),
        solids_to_execute=frozenset(execution_params.selector.solid_selection)
        if execution_params.selector.solid_selection else None,
        run_config=execution_params.run_config,
        mode=execution_params.mode,
        step_keys_to_execute=step_keys_to_execute,
        tags=tags,
        root_run_id=execution_params.execution_metadata.root_run_id,
        parent_run_id=execution_params.execution_metadata.parent_run_id,
        status=PipelineRunStatus.NOT_STARTED,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )

    # TODO: support memoized execution from dagit. https://github.com/dagster-io/dagster/issues/3322
    if is_memoized_run(tags):
        graphene_info.context.instance.report_engine_event(
            'Tag "{tag}" was found when initializing pipeline run, however, memoized '
            "execution is only supported from the dagster CLI. This pipeline will run, but "
            "outputs from previous executions will be ignored. "
            "In order to execute this pipeline using memoization, provide the "
            '"{tag}" tag to the `dagster pipeline execute` CLI. The CLI is documented at '
            "the provided link.".format(tag=MEMOIZED_RUN_TAG),
            pipeline_run,
            EngineEventData([
                EventMetadataEntry.url(
                    "https://docs.dagster.io/_apidocs/cli#dagster-pipeline-execute",
                    label="dagster_pipeline_execute_docs_url",
                    description=
                    "In order to execute this pipeline using memoization, provide the "
                    '"{tag}" tag to the `dagster pipeline execute` CLI. The CLI is documented at '
                    "the provided link.".format(tag=MEMOIZED_RUN_TAG),
                )
            ]),
        )

    return pipeline_run
示例#7
0
def test_default_launcher(
    ecs,
    instance,
    workspace,
    run,
    subnet,
    image,
    environment,
    task_long_arn_format,
):
    ecs.put_account_setting(name="taskLongArnFormat",
                            value=task_long_arn_format)
    assert not run.tags

    initial_task_definitions = ecs.list_task_definitions(
    )["taskDefinitionArns"]
    initial_tasks = ecs.list_tasks()["taskArns"]

    instance.launch_run(run.run_id, workspace)

    # A new task definition is created
    task_definitions = ecs.list_task_definitions()["taskDefinitionArns"]
    assert len(task_definitions) == len(initial_task_definitions) + 1
    task_definition_arn = list(
        set(task_definitions).difference(initial_task_definitions))[0]
    task_definition = ecs.describe_task_definition(
        taskDefinition=task_definition_arn)
    task_definition = task_definition["taskDefinition"]

    # It has a new family, name, and image
    assert task_definition["family"] == "dagster-run"
    assert len(task_definition["containerDefinitions"]) == 1
    container_definition = task_definition["containerDefinitions"][0]
    assert container_definition["name"] == "run"
    assert container_definition["image"] == image
    assert not container_definition.get("entryPoint")
    # But other stuff is inhereted from the parent task definition
    assert container_definition["environment"] == environment

    # A new task is launched
    tasks = ecs.list_tasks()["taskArns"]
    assert len(tasks) == len(initial_tasks) + 1
    task_arn = list(set(tasks).difference(initial_tasks))[0]
    task = ecs.describe_tasks(tasks=[task_arn])["tasks"][0]
    assert subnet.id in str(task)
    assert task["taskDefinitionArn"] == task_definition["taskDefinitionArn"]

    # The run is tagged with info about the ECS task
    assert instance.get_run_by_id(run.run_id).tags["ecs/task_arn"] == task_arn
    cluster_arn = ecs._cluster_arn("default")
    assert instance.get_run_by_id(
        run.run_id).tags["ecs/cluster"] == cluster_arn

    # If we're using the new long ARN format,
    # the ECS task is tagged with info about the Dagster run
    if task_long_arn_format == "enabled":
        assert (ecs.list_tags_for_resource(
            resourceArn=task_arn)["tags"][0]["key"] == "dagster/run_id")
        assert ecs.list_tags_for_resource(
            resourceArn=task_arn)["tags"][0]["value"] == run.run_id

    # We set pipeline-specific overides
    overrides = task["overrides"]["containerOverrides"]
    assert len(overrides) == 1
    override = overrides[0]
    assert override["name"] == "run"
    assert "execute_run" in override["command"]
    assert run.run_id in str(override["command"])

    # And we log
    events = instance.event_log_storage.get_logs_for_run(run.run_id)
    latest_event = events[-1]
    assert latest_event.message == "[EcsRunLauncher] Launching run in ECS task"
    event_metadata = latest_event.dagster_event.engine_event_data.metadata_entries
    assert EventMetadataEntry.text(task_arn, "ECS Task ARN") in event_metadata
    assert EventMetadataEntry.text(cluster_arn,
                                   "ECS Cluster") in event_metadata
    assert EventMetadataEntry.text(run.run_id, "Run ID") in event_metadata