def adopt_launched_task(self, kube_client, pod, pod_ids: dict):
        """
        Patch existing pod so that the current KubernetesJobWatcher can monitor it via label selectors

        :param kube_client: kubernetes client for speaking to kube API
        :param pod: V1Pod spec that we will patch with new label
        :param pod_ids: pod_ids we expect to patch.
        """
        self.log.info("attempting to adopt pod %s", pod.metadata.name)
        pod.metadata.labels['airflow-worker'] = str(self.scheduler_job_id)
        dag_id = pod.metadata.labels['dag_id']
        task_id = pod.metadata.labels['task_id']
        pod_id = create_pod_id(dag_id=dag_id, task_id=task_id)
        if pod_id not in pod_ids:
            self.log.error(
                "attempting to adopt task %s in dag %s which was not specified by database",
                task_id,
                dag_id,
            )
        else:
            try:
                kube_client.patch_namespaced_pod(
                    name=pod.metadata.name,
                    namespace=pod.metadata.namespace,
                    body=PodGenerator.serialize_pod(pod),
                )
                pod_ids.pop(pod_id)
            except ApiException as e:
                self.log.info("Failed to adopt pod %s. Reason: %s",
                              pod.metadata.name, e)
Пример #2
0
    def run_next(self, next_job: KubernetesJobType) -> None:
        """
        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job).replace("\n", " "))
        key, command, kube_executor_config, pod_template_file = next_job
        dag_id, task_id, run_id, try_number, map_index = key

        if command[0:3] != ["airflow", "tasks", "run"]:
            raise ValueError(
                'The command must start with ["airflow", "tasks", "run"].')

        base_worker_pod = get_base_pod_from_template(pod_template_file,
                                                     self.kube_config)

        if not base_worker_pod:
            raise AirflowException(
                f"could not find a valid worker template yaml at {self.kube_config.pod_template_file}"
            )

        pod = PodGenerator.construct_pod(
            namespace=self.namespace,
            scheduler_job_id=self.scheduler_job_id,
            pod_id=create_pod_id(dag_id, task_id),
            dag_id=dag_id,
            task_id=task_id,
            kube_image=self.kube_config.kube_image,
            try_number=try_number,
            map_index=map_index,
            date=None,
            run_id=run_id,
            args=command,
            pod_override_object=kube_executor_config,
            base_worker_pod=base_worker_pod,
        )
        # Reconcile the pod generated by the Operator and the Pod
        # generated by the .cfg file
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s",
                       pod.spec.containers[0].image)

        # the watcher will monitor pods, so we do not block.
        self.run_pod_async(pod, **self.kube_config.kube_client_request_args)
        self.log.debug("Kubernetes Job created!")
 def try_adopt_task_instances(
         self, tis: List[TaskInstance]) -> List[TaskInstance]:
     tis_to_flush = [ti for ti in tis if not ti.external_executor_id]
     scheduler_job_ids = [ti.external_executor_id for ti in tis]
     pod_ids = {
         create_pod_id(dag_id=ti.dag_id, task_id=ti.task_id): ti
         for ti in tis if ti.external_executor_id
     }
     kube_client: client.CoreV1Api = self.kube_client
     for scheduler_job_id in scheduler_job_ids:
         kwargs = {'label_selector': f'airflow-worker={scheduler_job_id}'}
         pod_list = kube_client.list_namespaced_pod(
             namespace=self.kube_config.kube_namespace, **kwargs)
         for pod in pod_list.items:
             self.adopt_launched_task(kube_client, pod, pod_ids)
     self._adopt_completed_pods(kube_client)
     tis_to_flush.extend(pod_ids.values())
     return tis_to_flush