Пример #1
0
def multiprocess_executor(init_context):
    """The default multiprocess executor.

    This simple multiprocess executor is available by default on any :py:class:`ModeDefinition`
    that does not provide custom executors. To select the multiprocess executor, include a fragment
    such as the following in your config:

    .. code-block:: yaml

        execution:
          multiprocess:
            config:
              max_concurrent: 4

    The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run
    concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
    :py:func:`python:multiprocessing.cpu_count`.

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    """
    from dagster.core.executor.init import InitExecutorContext
    from dagster.core.executor.multiprocess import MultiprocessExecutor

    check.inst_param(init_context, "init_context", InitExecutorContext)

    check_cross_process_constraints(init_context)

    return MultiprocessExecutor(
        pipeline=init_context.pipeline,
        max_concurrent=init_context.executor_config["max_concurrent"],
        retries=RetryMode.from_config(init_context.executor_config["retries"]),
    )
Пример #2
0
    def __init__(
        self,
        instance_config_map,
        dagster_home,
        postgres_password_secret,
        load_incluster_config=True,
        kubeconfig_file=None,
        broker=None,
        backend=None,
        include=None,
        config_source=None,
        retries=None,
        inst_data=None,
        k8s_client_batch_api=None,
        env_config_maps=None,
        env_secrets=None,
        volume_mounts=None,
        volumes=None,
    ):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)

        if load_incluster_config:
            check.invariant(
                kubeconfig_file is None,
                "`kubeconfig_file` is set but `load_incluster_config` is True.",
            )
            kubernetes.config.load_incluster_config()
        else:
            check.opt_str_param(kubeconfig_file, "kubeconfig_file")
            kubernetes.config.load_kube_config(kubeconfig_file)

        self._fixed_batch_api = k8s_client_batch_api

        self.instance_config_map = check.str_param(instance_config_map,
                                                   "instance_config_map")
        self.dagster_home = check.str_param(dagster_home, "dagster_home")
        self.postgres_password_secret = check.str_param(
            postgres_password_secret, "postgres_password_secret")
        self.broker = check.opt_str_param(broker, "broker")
        self.backend = check.opt_str_param(backend, "backend")
        self.include = check.opt_list_param(include, "include")
        self.config_source = check.opt_dict_param(config_source,
                                                  "config_source")

        retries = check.opt_dict_param(retries, "retries") or {"enabled": {}}
        self.retries = RetryMode.from_config(retries)

        self._env_config_maps = check.opt_list_param(env_config_maps,
                                                     "env_config_maps",
                                                     of_type=str)
        self._env_secrets = check.opt_list_param(env_secrets,
                                                 "env_secrets",
                                                 of_type=str)

        self._volume_mounts = check.opt_list_param(volume_mounts,
                                                   "volume_mounts")
        self._volumes = check.opt_list_param(volumes, "volumes")

        super().__init__()
Пример #3
0
def docker_executor(init_context: InitExecutorContext) -> Executor:
    from . import DockerRunLauncher

    image = init_context.executor_config.get("image")
    registry = init_context.executor_config.get("registry")
    env_vars = init_context.executor_config.get("env_vars")
    network = init_context.executor_config.get("network")
    networks = init_context.executor_config.get("networks")
    container_kwargs = init_context.executor_config.get("container_kwargs")

    run_launcher = init_context.instance.run_launcher
    if isinstance(run_launcher, DockerRunLauncher):
        image = image or run_launcher.image
        registry = registry or run_launcher.registry
        env_vars = run_launcher.env_vars + (env_vars or [])
        networks = run_launcher.networks + (networks or [])
        container_kwargs = merge_dicts(run_launcher.container_kwargs, container_kwargs or {})

    validate_docker_config(network, networks, container_kwargs)

    return StepDelegatingExecutor(
        DockerStepHandler(
            image,
            registry,
            env_vars,
            network,
            networks,
            container_kwargs,
        ),
        retries=RetryMode.from_config(init_context.executor_config["retries"]),
    )
Пример #4
0
def in_process_executor(init_context):
    """The default in-process executor.

    In most Dagster environments, this will be the default executor. It is available by default on
    any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly,
    include the following top-level fragment in config:

    .. code-block:: yaml

        execution:
          in_process:

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    """
    from dagster.core.executor.init import InitExecutorContext
    from dagster.core.executor.in_process import InProcessExecutor

    check.inst_param(init_context, "init_context", InitExecutorContext)

    return InProcessExecutor(
        # shouldn't need to .get() here - issue with defaults in config setup
        retries=RetryMode.from_config(
            init_context.executor_config.get("retries", {"enabled": {}})),
        marker_to_close=init_context.executor_config.get("marker_to_close"),
    )
Пример #5
0
    def needs_config(_):
        from dagster.core.executor.in_process import InProcessExecutor

        return InProcessExecutor(
            retries=RetryMode.from_config({"enabled": {}}),
            marker_to_close=None,
        )
Пример #6
0
def _core_multiprocess_executor_creation(max_concurrent, retries_config):
    from dagster.core.executor.multiprocess import MultiprocessExecutor

    return MultiprocessExecutor(
        max_concurrent=max_concurrent,
        retries=RetryMode.from_config(retries_config),
    )
Пример #7
0
def celery_executor(init_context):
    """Celery-based executor.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a celery executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery import celery_executor

        @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery:
            config:
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.
    """
    check_cross_process_constraints(init_context)

    return CeleryExecutor(
        broker=init_context.executor_config.get("broker"),
        backend=init_context.executor_config.get("backend"),
        config_source=init_context.executor_config.get("config_source"),
        include=init_context.executor_config.get("include"),
        retries=RetryMode.from_config(init_context.executor_config["retries"]),
    )
Пример #8
0
 def for_cli(broker=None, backend=None, include=None, config_source=None):
     return CeleryExecutor(
         retries=RetryMode(RetryMode.DISABLED),
         broker=broker,
         backend=backend,
         include=include,
         config_source=config_source,
     )
Пример #9
0
def _core_in_process_executor_creation(retries_config, marker_to_close):
    from dagster.core.executor.in_process import InProcessExecutor

    return InProcessExecutor(
        # shouldn't need to .get() here - issue with defaults in config setup
        retries=RetryMode.from_config(retries_config),
        marker_to_close=marker_to_close,
    )
Пример #10
0
def _core_in_process_executor_creation(config: Dict[str, Any]):
    from dagster.core.executor.in_process import InProcessExecutor

    return InProcessExecutor(
        # shouldn't need to .get() here - issue with defaults in config setup
        retries=RetryMode.from_config(config["retries"]),
        marker_to_close=config.get("marker_to_close"),
    )
Пример #11
0
def docker_executor(init_context: InitExecutorContext) -> Executor:
    """
    Executor which launches steps as Docker containers.

    To use the `docker_executor`, set it as the `executor_def` when defining a job:

    .. literalinclude:: ../../../../../../python_modules/libraries/dagster-docker/dagster_docker_tests/test_example_executor.py
       :start-after: start_marker
       :end-before: end_marker
       :language: python

    Then you can configure the executor with run config as follows:

    .. code-block:: YAML

        execution:
          config:
            registry: ...
            network: ...
            networks: ...
            container_kwargs: ...

    If you're using the DockerRunLauncher, configuration set on the containers created by the run
    launcher will also be set on the containers that are created for each step.
    """
    from . import DockerRunLauncher

    image = init_context.executor_config.get("image")
    registry = init_context.executor_config.get("registry")
    env_vars = init_context.executor_config.get("env_vars")
    network = init_context.executor_config.get("network")
    networks = init_context.executor_config.get("networks")
    container_kwargs = init_context.executor_config.get("container_kwargs")

    run_launcher = init_context.instance.run_launcher
    if isinstance(run_launcher, DockerRunLauncher):
        image = image or run_launcher.image
        registry = registry or run_launcher.registry
        env_vars = run_launcher.env_vars + (env_vars or [])
        networks = run_launcher.networks + (networks or [])
        container_kwargs = merge_dicts(run_launcher.container_kwargs,
                                       container_kwargs or {})

    validate_docker_config(network, networks, container_kwargs)

    return StepDelegatingExecutor(
        DockerStepHandler(
            image,
            registry,
            env_vars,
            network,
            networks,
            container_kwargs,
        ),
        retries=RetryMode.from_config(init_context.executor_config["retries"]),
    )
Пример #12
0
    def test_executor(init_context):
        from dagster.core.executor.in_process import InProcessExecutor

        assert init_context.executor_config == "secret testing value!!"

        return InProcessExecutor(
            # shouldn't need to .get() here - issue with defaults in config setup
            retries=RetryMode.from_config({"enabled": {}}),
            marker_to_close=None,
        )
Пример #13
0
def _core_multiprocess_executor_creation(config: Dict[str, Any]):
    from dagster.core.executor.multiprocess import MultiprocessExecutor

    # unpack optional selector
    start_method = None
    start_cfg = {}
    start_selector = config.get("start_method")
    if start_selector:
        start_method, start_cfg = list(start_selector.items())[0]

    return MultiprocessExecutor(
        max_concurrent=config["max_concurrent"],
        retries=RetryMode.from_config(config["retries"]),
        start_method=start_method,
        explicit_forkserver_preload=start_cfg.get("preload_modules"),
    )
Пример #14
0
def celery_k8s_job_executor(init_context):
    """Celery-based executor which launches tasks as Kubernetes Jobs.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Kubernetes Job executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. literalinclude:: ../../../../../../python_modules/libraries/dagster-celery-k8s/dagster_celery_k8s_tests/example_celery_mode_def.py
       :language: python

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-k8s:
            config:
              job_image: 'my_repo.com/image_name:latest'
              job_namespace: 'some-namespace'
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.

    In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery
    commands must be invoked with the `-A dagster_celery_k8s.app` argument.
    """

    run_launcher = init_context.instance.run_launcher
    exc_cfg = init_context.executor_config

    if not isinstance(run_launcher, CeleryK8sRunLauncher):
        raise DagsterUnmetExecutorRequirementsError(
            "This engine is only compatible with a CeleryK8sRunLauncher; configure the "
            "CeleryK8sRunLauncher on your instance to use it.", )

    job_config = DagsterK8sJobConfig(
        dagster_home=run_launcher.dagster_home,
        instance_config_map=run_launcher.instance_config_map,
        postgres_password_secret=run_launcher.postgres_password_secret,
        job_image=exc_cfg.get("job_image")
        or os.getenv("DAGSTER_CURRENT_IMAGE"),
        image_pull_policy=exc_cfg.get("image_pull_policy"),
        image_pull_secrets=exc_cfg.get("image_pull_secrets"),
        service_account_name=exc_cfg.get("service_account_name"),
        env_config_maps=exc_cfg.get("env_config_maps"),
        env_secrets=exc_cfg.get("env_secrets"),
    )

    # Set on the instance but overrideable here
    broker = run_launcher.broker or exc_cfg.get("broker")
    backend = run_launcher.backend or exc_cfg.get("backend")
    config_source = run_launcher.config_source or exc_cfg.get("config_source")
    include = run_launcher.include or exc_cfg.get("include")
    retries = run_launcher.retries or RetryMode.from_config(
        exc_cfg.get("retries"))

    return CeleryK8sJobExecutor(
        broker=broker,
        backend=backend,
        config_source=config_source,
        include=include,
        retries=retries,
        job_config=job_config,
        job_namespace=exc_cfg.get("job_namespace"),
        load_incluster_config=exc_cfg.get("load_incluster_config"),
        kubeconfig_file=exc_cfg.get("kubeconfig_file"),
        repo_location_name=exc_cfg.get("repo_location_name"),
    )
Пример #15
0
def celery_docker_executor(init_context):
    """Celery-based executor which launches tasks in docker containers.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when op executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute jobs
    with variations on these settings.

    To use the `celery_docker_executor`, set it as the `executor_def` when defining a job:

    .. code-block:: python

        from dagster import job
        from dagster_celery_docker.executor import celery_executor

        @job(executor_def=celery_docker_executor)
        def celery_enabled_job():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          config:
            docker:
              image: 'my_repo.com/image_name:latest'
              registry:
                url: 'my_repo.com'
                username: '******'
                password: {env: 'DOCKER_PASSWORD'}
              env_vars: ["DAGSTER_HOME"] # environment vars to pass from celery worker to docker
            broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
            backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
            include: ['my_module'] # Optional[List[str]]: Modules every worker should import
            config_source: # Dict[str, Any]: Any additional parameters to pass to the
                #...       # Celery workers. This dict will be passed as the `config_source`
                #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.

    In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery
    commands must be invoked with the `-A dagster_celery_docker.app` argument.
    """

    exc_cfg = init_context.executor_config

    return CeleryDockerExecutor(
        broker=exc_cfg.get("broker"),
        backend=exc_cfg.get("backend"),
        config_source=exc_cfg.get("config_source"),
        include=exc_cfg.get("include"),
        retries=RetryMode.from_config(exc_cfg.get("retries")),
        docker_config=exc_cfg.get("docker"),
    )
Пример #16
0
def k8s_job_executor(init_context: InitExecutorContext) -> Executor:
    """
    Executor which launches steps as Kubernetes Jobs.

    To use the `k8s_job_executor`, set it as the `executor_def` when defining a job:

    .. literalinclude:: ../../../../../../python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_example_executor_mode_def.py
       :start-after: start_marker
       :end-before: end_marker
       :language: python

    Then you can configure the executor with run config as follows:

    .. code-block:: YAML

        execution:
          config:
            job_namespace: 'some-namespace'
            image_pull_policy: ...
            image_pull_secrets: ...
            service_account_name: ...
            env_config_maps: ...
            env_secrets: ...
            job_image: ... # leave out if using userDeployments
    """

    run_launcher = init_context.instance.run_launcher
    if not isinstance(run_launcher, K8sRunLauncher):
        raise DagsterUnmetExecutorRequirementsError(
            "This engine is only compatible with a K8sRunLauncher; configure the "
            "K8sRunLauncher on your instance to use it.", )

    exc_cfg = init_context.executor_config
    job_config = DagsterK8sJobConfig(
        dagster_home=run_launcher.dagster_home,
        instance_config_map=run_launcher.instance_config_map,
        postgres_password_secret=run_launcher.postgres_password_secret,
        job_image=exc_cfg.get("job_image"),
        image_pull_policy=(exc_cfg.get("image_pull_policy")
                           if exc_cfg.get("image_pull_policy") != None else
                           run_launcher.image_pull_policy),
        image_pull_secrets=run_launcher.image_pull_secrets +
        (exc_cfg.get("image_pull_secrets") or []),
        service_account_name=(exc_cfg.get("service_account_name")
                              if exc_cfg.get("service_account_name") != None
                              else run_launcher.service_account_name),
        env_config_maps=run_launcher.env_config_maps +
        (exc_cfg.get("env_config_maps") or []),
        env_secrets=run_launcher.env_secrets +
        (exc_cfg.get("env_secrets") or []),
        volume_mounts=run_launcher.volume_mounts +
        (exc_cfg.get("volume_mounts") or []),
        volumes=run_launcher.volumes + (exc_cfg.get("volumes") or []),
    )

    return StepDelegatingExecutor(
        K8sStepHandler(
            job_config=job_config,
            job_namespace=(exc_cfg.get("job_namespace")
                           if exc_cfg.get("job_namespace") != None else
                           run_launcher.job_namespace),
            load_incluster_config=run_launcher.load_incluster_config,
            kubeconfig_file=run_launcher.kubeconfig_file,
        ),
        retries=RetryMode.from_config(init_context.executor_config["retries"]),
        should_verify_step=True,
    )
Пример #17
0
def k8s_job_executor(init_context: InitExecutorContext) -> Executor:
    """
    Executor which launches steps as Kubernetes Jobs.

    To use the `k8s_job_executor`, set it as the `executor_def` when defining a job:

    .. literalinclude:: ../../../../../../python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_example_executor_mode_def.py
       :start-after: start_marker
       :end-before: end_marker
       :language: python

    Then you can configure the executor with run config as follows:

    .. code-block:: YAML

        execution:
          config:
            job_namespace: 'some-namespace'
            image_pull_policy: ...
            image_pull_secrets: ...
            service_account_name: ...
            env_config_maps: ...
            env_secrets: ...
            env_vars: ...
            job_image: ... # leave out if using userDeployments

    Configuration set on the Kubernetes Jobs and Pods created by the `K8sRunLauncher` will also be
    set on Kubernetes Jobs and Pods created by the `k8s_job_executor`.
    """

    run_launcher = init_context.instance.run_launcher
    if not isinstance(run_launcher, K8sRunLauncher):
        raise DagsterUnmetExecutorRequirementsError(
            "This engine is only compatible with a K8sRunLauncher; configure the "
            "K8sRunLauncher on your instance to use it.",
        )

    exc_cfg = init_context.executor_config

    k8s_container_context = K8sContainerContext(
        image_pull_policy=exc_cfg.get("image_pull_policy"),
        image_pull_secrets=exc_cfg.get("image_pull_secrets"),
        service_account_name=exc_cfg.get("service_account_name"),
        env_config_maps=exc_cfg.get("env_config_maps"),
        env_secrets=exc_cfg.get("env_secrets"),
        env_vars=exc_cfg.get("env_vars"),
        volume_mounts=exc_cfg.get("volume_mounts"),
        volumes=exc_cfg.get("volumes"),
        labels=exc_cfg.get("labels"),
        namespace=exc_cfg.get("job_namespace"),
    )

    return StepDelegatingExecutor(
        K8sStepHandler(
            image=exc_cfg.get("job_image"),
            container_context=k8s_container_context,
            load_incluster_config=run_launcher.load_incluster_config,
            kubeconfig_file=run_launcher.kubeconfig_file,
        ),
        retries=RetryMode.from_config(init_context.executor_config["retries"]),
        should_verify_step=True,
    )