def _exec_test(fpath):
    fpath = resolve_relative_path(fpath)
    logging.info(style.CYAN(f"Testing {fpath}:"))
    body = None
    with open(fpath, "r") as raw:
        body = list(yaml.safe_load_all(raw.read()))
    test_name = re.sub(r"[^a-zA-Z0-9]", "-",
                       os.path.splitext(os.path.basename(fpath))[0])
    idx = 0
    for resource in body:
        if resource.get("metadata", {}).get("name") is None:
            resource["metadata"] = resource.get("metadata", {})
            resource["metadata"][
                "name"] = test_name if idx == 0 else f"{test_name}-{str(idx)}"
            idx += 1

    runner = JobRunner(
        body,
        auto_load_kube_config=True,
        logger=logging,
        delete_policy=JobRunnerDeletePolicy.Always)  # type:ignore
    runner.execute_job()
Пример #2
0
    def __init__(
        self,
        task_id: str,
        command: List[str] = None,
        arguments: List[str] = None,
        image: str = None,
        namespace: str = None,
        envs: dict = None,
        body: Union[str, dict, List[dict]] = None,
        body_filepath: str = None,
        image_pull_policy: str = None,
        delete_policy: Union[str, JobRunnerDeletePolicy] = DEFAULT_DELETE_POLICY,
        in_cluster: bool = None,
        config_file: str = None,
        get_logs: bool = True,
        cluster_context: str = None,
        startup_timeout_seconds: float = DEFAULT_TASK_STARTUP_TIMEOUT,
        validate_body_on_init: bool = DEFAULT_VALIDATE_BODY_ON_INIT,
        enable_jinja: bool = True,
        jinja_job_args: dict = None,
        **kwargs,
    ):
        """A operator that executes an airflow task as a kubernetes Job.
        See: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/
        for notes about a kubernetes job.

        Keyword Arguments:

            command {List[str]} -- The pod main container command (default: None)
            arguments {List[str]} -- the pod main container arguments. (default: None)
            image {str} -- The image to use in the pod. (default: None)
            namespace {str} -- The namespace to execute in. (default: None)
            envs {dict} -= A collection of environment variables that will be added to all
                containers.
            body {dict|string} -- The job to execute as a yaml description. (default: None)
                If None, will use a default job yaml command. In this case you must provide an
                image.
            body_filepath {str} -- The path to the file to read the yaml from, overridden by
                body. (default: None)
            delete_policy {str} -- Any of: Never, Always, IfSucceeded (default: {"IfSucceeded"})
            in_cluster {bool} -- True if running inside a cluster (on a pod) (default: {False})
            config_file {str} -- The kubernetes configuration file to load, if
                None use default config. (default: {None})
            cluster_context {str} -- The context to run in, if None, use current context
                (default: {None})
            validate_body_on_init {bool} -- If true, validates the yaml in the constructor,
                setting this to True, will slow dag creation.
                (default: {from env/airflow config: AIRFLOW__KUBE_JOB_OPERATOR__validate_body_on_init or False})
            enable_jinja {bool} -- If true, the following fields will be parsed as jinja2,
                        command, arguments, image, envs, body, namespace, config_file, cluster_context
            jinja_job_args {dict} -- A dictionary or object to be used in the jinja template to render
                arguments. The jinja args are loaded under the keyword "job".

        Auto completed yaml values (if missing):
            All:
                metadata.namespace = current namespace
            Pod:
                spec.restartPolicy = Never
            Job:
                spec.backOffLimit = 0
                spec.template.spec.restartPolicy = Never
                metadata.finalizers - [foregroundDeletion]

        """
        super().__init__(task_id=task_id, **kwargs)

        assert body_filepath is not None or body is not None or image is not None, ValueError(
            "body is None, body_filepath is None and an image was not defined. Unknown image to execute."
        )

        body = body or self._read_body_from_file(
            resolve_relative_path(
                body_filepath or DEFAULT_EXECUTION_OBJECT_PATHS[DEFAULT_EXECTION_OBJECT],
                self.resolve_relative_path_callstack_offset + 1,
            )
        )

        assert body is not None and (isinstance(body, (dict, str))), ValueError(
            "body must either be a yaml string or a dict"
        )

        if isinstance(delete_policy, str):
            try:
                delete_policy = JobRunnerDeletePolicy(delete_policy)
            except Exception:
                delete_policy = None

        assert delete_policy is not None and isinstance(delete_policy, JobRunnerDeletePolicy), ValueError(
            f"Invalid delete policy. Valid values are ({JobRunnerDeletePolicy.__module__}.JobRunnerDeletePolicy):"
            + f" {[str(v) for v in JobRunnerDeletePolicy]}"
        )

        assert envs is None or isinstance(envs, dict), ValueError("The env collection must be a dict or None")
        assert image is None or isinstance(image, str), ValueError("image must be a string or None")

        # Job properties.
        self._job_is_executing = False
        self._job_runner: JobRunner = None

        # override/replace properties
        self.command = command
        self.arguments = arguments
        self.image = image
        self.envs = envs
        self.image_pull_policy = image_pull_policy
        self.body = body
        self.namespace = namespace
        self.get_logs = get_logs
        self.delete_policy = delete_policy

        # kubernetes config properties.
        self.config_file = config_file
        self.cluster_context = cluster_context
        self.in_cluster = in_cluster

        # operation properties
        self.startup_timeout_seconds = startup_timeout_seconds

        # Jinja
        self.jinja_job_args = jinja_job_args
        if enable_jinja:
            self.template_fields = [
                "command",
                "arguments",
                "image",
                "envs",
                "body",
                "namespace",
                "config_file",
                "cluster_context",
            ]

        # Used for debugging
        self._internal_wait_kuberentes_object_timeout = None

        if validate_body_on_init:
            assert not enable_jinja or isinstance(body, dict), ValueError(
                "Cannot set validate_body_on_init=True, if input body is string. "
                + "Jinja context only exists when the task is executed."
            )
            self.prepare_and_update_body()
Пример #3
0
    def __init__(
        self,
        namespace: str = None,
        image: str = None,
        cmds: List[str] = None,
        arguments: List[str] = None,
        ports: list = None,
        volume_mounts: List[VolumeMount] = None,
        volumes: List[Volume] = None,
        env_vars: dict = None,
        secrets: List[Secret] = None,
        in_cluster: bool = None,
        cluster_context: str = None,
        labels: dict = None,
        startup_timeout_seconds: float = 120,
        get_logs: bool = True,
        image_pull_policy: str = "IfNotPresent",
        annotations: dict = None,
        resources=None,
        affinity: dict = None,
        config_file: str = None,
        node_selectors: dict = None,
        image_pull_secrets: str = None,
        service_account_name: str = "default",
        is_delete_operator_pod: bool = False,
        hostnetwork: bool = False,
        tolerations: List[dict] = None,
        configmaps: List[str] = None,
        security_context: dict = None,
        pod_runtime_info_envs: dict = None,
        dnspolicy: str = None,
        # new args.
        init_containers: Optional[List[k8s.V1Container]] = None,
        env_from: List[str] = None,
        schedulername: str = None,
        priority_class_name: str = None,
        # job operator args
        body: str = None,
        body_filepath: str = None,
        delete_policy: Union[str, JobRunnerDeletePolicy] = None,
        validate_body_on_init: bool = DEFAULT_VALIDATE_BODY_ON_INIT,
        enable_jinja: bool = True,
        jinja_job_args: dict = None,
        *args,
        **kwargs,
    ):
        """
        A operator that executes an airflow task as a kubernetes Job.
        See: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/
            for notes about kubernetes jobs.

        NOTE: This is a legacy operator that allows for similar arguments
        as the KubernetesPodOperator. Please use the KubernetesJobOperator instead.

        NOTE: xcom has not been implemented.

        :param image: Docker image you wish to launch. Defaults to dockerhub.io,
            but fully qualified URLS will point to custom repositories
        :type image: str
        :param namespace: the namespace to run within kubernetes
        :type namespace: str
        :param cmds: entrypoint of the container. (templated)
            The docker images's entrypoint is used if this is not provide.
        :type cmds: list[str]
        :param arguments: arguments of the entrypoint. (templated)
            The docker image's CMD is used if this is not provided.
        :type arguments: list[str]
        :param image_pull_policy: Specify a policy to cache or always pull an image
        :type image_pull_policy: str
        :param image_pull_secrets: Any image pull secrets to be given to the pod.
                                If more than one secret is required, provide a
                                comma separated list: secret_a,secret_b
        :type image_pull_secrets: str
        :param ports: ports for launched pod
        :type ports: list
        :param volume_mounts: volumeMounts for launched pod
        :type volume_mounts: list[airflow.contrib.kubernetes.volume_mount.VolumeMount]
        :param volumes: volumes for launched pod. Includes ConfigMaps and PersistentVolumes
        :type volumes: list[airflow.contrib.kubernetes.volume.Volume]
        :param labels: labels to apply to the Pod
        :type labels: dict
        :param startup_timeout_seconds: timeout in seconds to startup the pod
        :type startup_timeout_seconds: int
        :param name: name of the task you want to run,
            will be used to generate a pod id
        :type name: str
        :param env_vars: Environment variables initialized in the container. (templated)
        :type env_vars: dict
        :param secrets: Kubernetes secrets to inject in the container,
            They can be exposed as environment vars or files in a volume.
        :type secrets: list[airflow.contrib.kubernetes.secret.Secret]
        :param in_cluster: run kubernetes client with in_cluster configuration (if None autodetect)
        :type in_cluster: bool
        :param cluster_context: context that points to kubernetes cluster.
            Ignored when in_cluster is True. If None, current-context is used.
        :type cluster_context: str
        :param get_logs: get the stdout of the container as logs of the tasks
        :type get_logs: bool
        :param annotations: non-identifying metadata you can attach to the Pod.
                            Can be a large range of data, and can include characters
                            that are not permitted by labels.
        :type annotations: dict
        :param resources: A dict containing a group of resources requests and limits
        :type resources: dict
        :param affinity: A dict containing a group of affinity scheduling rules
        :type affinity: dict
        :param node_selectors: A dict containing a group of scheduling rules
        :type node_selectors: dict
        :param config_file: The path to the Kubernetes config file
        :type config_file: str
        :param is_delete_operator_pod: What to do when the pod reaches its final
            state, or the execution is interrupted.
            If False (default): do nothing, If True: delete the pod if succeeded
        :type is_delete_operator_pod: bool
        :param hostnetwork: If True enable host networking on the pod
        :type hostnetwork: bool
        :param tolerations: A list of kubernetes tolerations
        :type tolerations: list tolerations
        :param configmaps: A list of configmap names objects that we
            want mount as env variables
        :type configmaps: list[str]
        :param pod_runtime_info_envs: environment variables about
                                    pod runtime information (ip, namespace, nodeName, podName)
        :type pod_runtime_info_envs: list[PodRuntimeEnv]
        :param dnspolicy: Specify a dnspolicy for the pod
        :type dnspolicy: str

        Added arguments:

            body {dict|string} -- The job to execute as a yaml description. (default: None)
            body_filepath {str} -- The path to the file to read the yaml from, overridden by
                body. (default: None)
            delete_policy {str} -- Any of: Never, Always, IfSucceeded (default: {"IfSucceeded"});
                overrides is_delete_operator_pod.
            validate_body_on_init {bool} -- If true, validates the yaml in the constructor,
                setting this to True, will slow dag creation.
                (default: {from env/airflow config: AIRFLOW__KUBE_JOB_OPERATOR__validate_body_on_init or False})
            jinja_job_args {dict} -- A dictionary or object to be used in the jinja template to render
                arguments. The jinja args are loaded under the keyword "job".
        """
        delete_policy = (delete_policy or JobRunnerDeletePolicy.IfSucceeded
                         if is_delete_operator_pod else
                         JobRunnerDeletePolicy.Never)

        if body_filepath is not None:
            body_filepath = resolve_relative_path(body_filepath, 2)

        super().__init__(
            command=cmds or [],
            arguments=arguments or [],
            envs=env_vars or {},
            image=image,
            namespace=namespace,
            body=body,
            body_filepath=body_filepath,
            delete_policy=delete_policy,
            in_cluster=in_cluster,
            config_file=config_file,
            cluster_context=cluster_context,
            validate_body_on_init=validate_body_on_init,
            startup_timeout_seconds=startup_timeout_seconds,
            get_logs=get_logs,
            enable_jinja=enable_jinja,
            image_pull_policy=image_pull_policy,
            jinja_job_args=jinja_job_args,
            *args,
            **kwargs,
        )

        # adding self properties.
        self.labels = labels or {}
        self.ports = ports or []
        self.volume_mounts = volume_mounts or []
        self.volumes = volumes or []
        self.secrets = secrets or []
        self.node_selectors = node_selectors or {}
        self.annotations = annotations or {}
        self.affinity = affinity or {}
        self.resources = self._set_resources(resources)
        self.image_pull_secrets = image_pull_secrets
        self.service_account_name = service_account_name
        self.hostnetwork = hostnetwork
        self.tolerations = tolerations or []
        self.configmaps = configmaps or []
        self.security_context = security_context or {}
        self.pod_runtime_info_envs = pod_runtime_info_envs or []
        self.dnspolicy = dnspolicy

        self.init_containers = init_containers
        self.env_from = env_from
        self.schedulername = schedulername
        self.priority_class_name = priority_class_name