Exemplo n.º 1
0
    def _execute_step_docker(
        self,
        execute_step_args_packed,
        docker_config,
    ):
        """Run step execution in a Docker container."""
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            ))
        check.inst_param(execute_step_args, "execute_step_args",
                         ExecuteStepArgs)

        check.dict_param(docker_config, "docker_config")

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)
        pipeline_run = instance.get_run_by_id(
            execute_step_args.pipeline_run_id)
        check.inst(
            pipeline_run,
            PipelineRun,
            "Could not load run {}".format(execute_step_args.pipeline_run_id),
        )
        step_keys_str = ", ".join(execute_step_args.step_keys_to_execute)

        input_json = serialize_dagster_namedtuple(execute_step_args)

        command = "dagster api execute_step {}".format(json.dumps(input_json))

        docker_image = (docker_config["image"]
                        if docker_config.get("image") else execute_step_args.
                        pipeline_origin.repository_origin.container_image)

        if not docker_image:
            raise Exception(
                "No docker image specified by either the job or the repository"
            )

        client = docker.client.from_env()

        if docker_config.get("registry"):
            client.login(
                registry=docker_config["registry"]["url"],
                username=docker_config["registry"]["username"],
                password=docker_config["registry"]["password"],
            )

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            "Executing steps {} in Docker container {}".format(
                step_keys_str, docker_image),
            pipeline_run,
            EngineEventData(
                [
                    MetadataEntry.text(step_keys_str, "Step keys"),
                    MetadataEntry.text(docker_image, "Image"),
                    MetadataEntry.text(self.request.hostname, "Celery worker"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryDockerExecutor,
            step_key=execute_step_args.step_keys_to_execute[0],
        )

        serialized_events = [serialize_dagster_namedtuple(engine_event)]

        docker_env = {}
        if docker_config.get("env_vars"):
            docker_env = {
                env_name: os.getenv(env_name)
                for env_name in docker_config["env_vars"]
            }

        try:
            docker_response = client.containers.run(
                docker_image,
                command=command,
                detach=False,
                auto_remove=True,
                # pass through this worker's environment for things like AWS creds etc.
                environment=docker_env,
                network=docker_config.get("network", None),
            )

            res = docker_response.decode("utf-8")
        except docker.errors.ContainerError as err:
            instance.report_engine_event(
                "Failed to run steps {} in Docker container {}".format(
                    step_keys_str, docker_image),
                pipeline_run,
                EngineEventData([
                    MetadataEntry.text(docker_image, "Job image"),
                    MetadataEntry.text(err.stderr, "Docker stderr"),
                ], ),
                CeleryDockerExecutor,
                step_key=execute_step_args.step_keys_to_execute[0],
            )
            raise
        else:
            if res is None:
                raise Exception(
                    "No response from execute_step in CeleryDockerExecutor")

            serialized_events += [event for event in res.split("\n") if event]

        return serialized_events
Exemplo n.º 2
0
    def _execute_step_docker(
        _self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        docker_config,
    ):
        '''Run step execution in a Docker container.
        '''
        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        step_keys_str = ", ".join(step_keys)

        variables = {
            'executionParams': {
                'runConfigData': run_config,
                'mode': mode,
                'selector': {
                    'repositoryLocationName': repo_location_name,
                    'repositoryName': repo_name,
                    'pipelineName': pipeline_run.pipeline_name,
                    'solidSelection': pipeline_run.solid_selection,
                },
                'executionMetadata': {
                    'runId': run_id
                },
                'stepKeys': step_keys,
            }
        }

        command = 'dagster-graphql -v \'{variables}\' -p executePlan'.format(
            variables=seven.json.dumps(variables))
        docker_image = docker_config['image']
        client = docker.client.from_env()

        if docker_config.get('registry'):
            client.login(
                registry=docker_config['registry']['url'],
                username=docker_config['registry']['username'],
                password=docker_config['registry']['password'],
            )

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            'Executing steps {} in Docker container {}'.format(
                step_keys_str, docker_image),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, 'Step keys'),
                    EventMetadataEntry.text(docker_image, 'Image'),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryDockerExecutor,
            step_key=step_keys[0],
        )

        events = [engine_event]

        docker_env = {}
        if docker_config.get('env_vars'):
            docker_env = {
                env_name: os.getenv(env_name)
                for env_name in docker_config['env_vars']
            }

        try:
            docker_response = client.containers.run(
                docker_image,
                command=command,
                detach=False,
                auto_remove=True,
                # pass through this worker's environment for things like AWS creds etc.
                environment=docker_env,
            )
            res = seven.json.loads(docker_response)

        except docker.errors.ContainerError as err:
            instance.report_engine_event(
                'Failed to run steps {} in Docker container {}'.format(
                    step_keys_str, docker_image),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(docker_image, 'Job image'),
                    EventMetadataEntry.text(err.stderr, 'Docker stderr'),
                ], ),
                CeleryDockerExecutor,
                step_key=step_keys[0],
            )
            raise

        except JSONDecodeError:
            instance.report_engine_event(
                'Failed to parse response for steps {} from Docker container {}'
                .format(step_keys_str, docker_image),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(docker_image, 'Job image'),
                    EventMetadataEntry.text(docker_response,
                                            'Docker Response'),
                ], ),
                CeleryDockerExecutor,
                step_key=step_keys[0],
            )
            raise

        else:
            handle_execution_errors(res, 'executePlan')
            step_events = handle_execute_plan_result(res)

        events += step_events

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Exemplo n.º 3
0
    def _execute_step_docker(
        self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        run_id,
        docker_config,
        pipeline_origin_packed,
        retries_dict,
    ):
        """Run step execution in a Docker container.
        """
        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.list_param(step_keys, "step_keys", of_type=str)
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.str_param(repo_name, "repo_name")
        check.str_param(run_id, "run_id")
        check.dict_param(docker_config, "docker_config")
        pipeline_origin = unpack_value(
            check.dict_param(pipeline_origin_packed, "pipeline_origin_packed"))
        check.dict_param(retries_dict, "retries_dict")

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, "Could not load run {}".format(run_id))

        step_keys_str = ", ".join(step_keys)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=instance_ref,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            ))

        command = "dagster api execute_step_with_structured_logs {}".format(
            json.dumps(input_json))

        docker_image = docker_config["image"]

        client = docker.client.from_env()

        if docker_config.get("registry"):
            client.login(
                registry=docker_config["registry"]["url"],
                username=docker_config["registry"]["username"],
                password=docker_config["registry"]["password"],
            )

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            "Executing steps {} in Docker container {}".format(
                step_keys_str, docker_image),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, "Step keys"),
                    EventMetadataEntry.text(docker_image, "Image"),
                    EventMetadataEntry.text(self.request.hostname,
                                            "Celery worker"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryDockerExecutor,
            step_key=step_keys[0],
        )

        serialized_events = [serialize_dagster_namedtuple(engine_event)]

        docker_env = {}
        if docker_config.get("env_vars"):
            docker_env = {
                env_name: os.getenv(env_name)
                for env_name in docker_config["env_vars"]
            }

        try:
            docker_response = client.containers.run(
                docker_image,
                command=command,
                detach=False,
                auto_remove=True,
                # pass through this worker's environment for things like AWS creds etc.
                environment=docker_env,
                network=docker_config.get("network", None),
            )

            res = docker_response.decode("utf-8")
        except docker.errors.ContainerError as err:
            instance.report_engine_event(
                "Failed to run steps {} in Docker container {}".format(
                    step_keys_str, docker_image),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(docker_image, "Job image"),
                    EventMetadataEntry.text(err.stderr, "Docker stderr"),
                ], ),
                CeleryDockerExecutor,
                step_key=step_keys[0],
            )
            raise
        else:
            if res is None:
                raise Exception(
                    "No response from execute_step_with_structured_logs in CeleryDockerExecutor"
                )

            serialized_events += [event for event in res.split("\n") if event]

        return serialized_events
Exemplo n.º 4
0
    def _execute_step_docker(
        _self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        docker_config,
    ):
        """Run step execution in a Docker container.
        """
        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, "Could not load run {}".format(run_id))

        step_keys_str = ", ".join(step_keys)

        variables = {
            "executionParams": {
                "runConfigData": run_config,
                "mode": mode,
                "selector": {
                    "repositoryLocationName":
                    repo_location_name,
                    "repositoryName":
                    repo_name,
                    "pipelineName":
                    pipeline_run.pipeline_name,
                    "solidSelection":
                    list(pipeline_run.solids_to_execute)
                    if pipeline_run.solids_to_execute else None,
                },
                "executionMetadata": {
                    "runId": run_id
                },
                "stepKeys": step_keys,
            }
        }

        command = "dagster-graphql -v '{variables}' -p executePlan".format(
            variables=seven.json.dumps(variables))
        docker_image = docker_config["image"]
        client = docker.client.from_env()

        if docker_config.get("registry"):
            client.login(
                registry=docker_config["registry"]["url"],
                username=docker_config["registry"]["username"],
                password=docker_config["registry"]["password"],
            )

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            "Executing steps {} in Docker container {}".format(
                step_keys_str, docker_image),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, "Step keys"),
                    EventMetadataEntry.text(docker_image, "Image"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryDockerExecutor,
            step_key=step_keys[0],
        )

        events = [engine_event]

        docker_env = {}
        if docker_config.get("env_vars"):
            docker_env = {
                env_name: os.getenv(env_name)
                for env_name in docker_config["env_vars"]
            }

        try:
            docker_response = client.containers.run(
                docker_image,
                command=command,
                detach=False,
                auto_remove=True,
                # pass through this worker's environment for things like AWS creds etc.
                environment=docker_env,
            )
            res = seven.json.loads(docker_response)

        except docker.errors.ContainerError as err:
            instance.report_engine_event(
                "Failed to run steps {} in Docker container {}".format(
                    step_keys_str, docker_image),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(docker_image, "Job image"),
                    EventMetadataEntry.text(err.stderr, "Docker stderr"),
                ], ),
                CeleryDockerExecutor,
                step_key=step_keys[0],
            )
            raise

        except JSONDecodeError:
            instance.report_engine_event(
                "Failed to parse response for steps {} from Docker container {}"
                .format(step_keys_str, docker_image),
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(docker_image, "Job image"),
                    EventMetadataEntry.text(docker_response,
                                            "Docker Response"),
                ], ),
                CeleryDockerExecutor,
                step_key=step_keys[0],
            )
            raise

        else:
            handle_execution_errors(res, "executePlan")
            step_events = handle_execute_plan_result(res)

        events += step_events

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Exemplo n.º 5
0
    def _execute_step_docker(
        self,
        execute_step_args_packed,
        docker_config,
    ):
        """Run step execution in a Docker container."""
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            ))
        check.inst_param(execute_step_args, "execute_step_args",
                         ExecuteStepArgs)

        check.dict_param(docker_config, "docker_config")

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)
        pipeline_run = instance.get_run_by_id(
            execute_step_args.pipeline_run_id)
        check.inst(
            pipeline_run,
            PipelineRun,
            "Could not load run {}".format(execute_step_args.pipeline_run_id),
        )
        step_keys_str = ", ".join(execute_step_args.step_keys_to_execute)

        input_json = serialize_dagster_namedtuple(execute_step_args)

        command = "dagster api execute_step {}".format(json.dumps(input_json))

        docker_image = (docker_config["image"]
                        if docker_config.get("image") else execute_step_args.
                        pipeline_origin.repository_origin.container_image)

        if not docker_image:
            raise Exception(
                "No docker image specified by either the job or the repository"
            )

        client = docker.client.from_env()

        if docker_config.get("registry"):
            client.login(
                registry=docker_config["registry"]["url"],
                username=docker_config["registry"]["username"],
                password=docker_config["registry"]["password"],
            )

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            "Executing steps {} in Docker container {}".format(
                step_keys_str, docker_image),
            pipeline_run,
            EngineEventData(
                [
                    MetadataEntry("Step keys", value=step_keys_str),
                    MetadataEntry("Image", value=docker_image),
                    MetadataEntry("Celery worker",
                                  value=self.request.hostname),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryDockerExecutor,
            step_key=execute_step_args.step_keys_to_execute[0],
        )

        serialized_events = [serialize_dagster_namedtuple(engine_event)]

        docker_env = {}
        if docker_config.get("env_vars"):
            docker_env = {
                env_name: os.getenv(env_name)
                for env_name in docker_config["env_vars"]
            }

        container_kwargs = check.opt_dict_param(
            docker_config.get("container_kwargs"),
            "container_kwargs",
            key_type=str)

        # set defaults for detach and auto_remove
        container_kwargs["detach"] = container_kwargs.get("detach", False)
        container_kwargs["auto_remove"] = container_kwargs.get(
            "auto_remove", True)

        # if environment variables are provided via container_kwargs, merge with env_vars
        if container_kwargs.get("environment") is not None:
            e_vars = container_kwargs.get("environment")
            if isinstance(e_vars, dict):
                docker_env.update(e_vars)
            else:
                for v in e_vars:
                    key, val = v.split("=")
                    docker_env[key] = val
            del container_kwargs["environment"]

        try:
            docker_response = client.containers.run(
                docker_image,
                command=command,
                # pass through this worker's environment for things like AWS creds etc.
                environment=docker_env,
                network=docker_config.get("network", None),
                **container_kwargs,
            )

            res = docker_response.decode("utf-8")
        except docker.errors.ContainerError as err:
            entries = [MetadataEntry("Job image", value=docker_image)]
            if err.stderr is not None:
                entries.append(MetadataEntry("Docker stderr",
                                             value=err.stderr))

            instance.report_engine_event(
                "Failed to run steps {} in Docker container {}".format(
                    step_keys_str, docker_image),
                pipeline_run,
                EngineEventData(entries),
                CeleryDockerExecutor,
                step_key=execute_step_args.step_keys_to_execute[0],
            )
            raise
        else:
            if res is None:
                raise Exception(
                    "No response from execute_step in CeleryDockerExecutor")

            serialized_events += [event for event in res.split("\n") if event]

        return serialized_events