def get_base_pod_from_template(pod_template_file: Optional[str], kube_config: Any) -> k8s.V1Pod: """ Reads either the pod_template_file set in the executor_config or the base pod_template_file set in the airflow.cfg to craft a "base pod" that will be used by the KubernetesExecutor :param pod_template_file: absolute path to a pod_template_file.yaml or None :param kube_config: The KubeConfig class generated by airflow that contains all kube metadata :return: a V1Pod that can be used as the base pod for k8s tasks """ if pod_template_file: return PodGenerator.deserialize_model_file(pod_template_file) else: return PodGenerator.deserialize_model_file(kube_config.pod_template_file)
def test_construct_pod_empty_executor_config(self, mock_uuid): path = sys.path[0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml' worker_config = PodGenerator.deserialize_model_file(path) mock_uuid.return_value = self.static_uuid executor_config = None result = PodGenerator.construct_pod( dag_id='dag_id', task_id='task_id', pod_id='pod_id', kube_image='test-image', try_number=3, date=self.execution_date, args=['command'], pod_override_object=executor_config, base_worker_pod=worker_config, namespace='namespace', scheduler_job_id='uuid', ) sanitized_result = self.k8s_client.sanitize_for_serialization(result) worker_config.spec.containers[0].image = "test-image" worker_config.spec.containers[0].args = ["command"] worker_config.metadata.annotations = self.annotations worker_config.metadata.labels = self.labels worker_config.metadata.labels['app'] = 'myapp' worker_config.metadata.name = 'pod_id-' + self.static_uuid.hex worker_config.metadata.namespace = 'namespace' worker_config.spec.containers[0].env.append( k8s.V1EnvVar(name="AIRFLOW_IS_K8S_EXECUTOR_POD", value='True') ) worker_config_result = self.k8s_client.sanitize_for_serialization(worker_config) assert worker_config_result == sanitized_result
def test_ensure_max_label_length(self, mock_uuid): mock_uuid.return_value = self.static_uuid path = os.path.join(os.path.dirname(__file__), 'pod_generator_base_with_secrets.yaml') worker_config = PodGenerator.deserialize_model_file(path) result = PodGenerator.construct_pod( dag_id='a' * 512, task_id='a' * 512, pod_id='a' * 512, kube_image='a' * 512, try_number=3, date=self.execution_date, args=['command'], namespace='namespace', scheduler_job_id='a' * 512, pod_override_object=None, base_worker_pod=worker_config, ) assert result.metadata.name == 'a' * 63 + '.' + self.static_uuid.hex for _, v in result.metadata.labels.items(): assert len(v) <= 63 assert 'a' * 512 == result.metadata.annotations['dag_id'] assert 'a' * 512 == result.metadata.annotations['task_id']
def generate_pod_yaml(args): """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor""" execution_date = args.execution_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path kube_config = KubeConfig() for task in dag.tasks: ti = TaskInstance(task, execution_date) pod = PodGenerator.construct_pod( dag_id=args.dag_id, task_id=ti.task_id, pod_id=create_pod_id(args.dag_id, ti.task_id), try_number=ti.try_number, kube_image=kube_config.kube_image, date=ti.execution_date, args=ti.command_as_list(), pod_override_object=PodGenerator.from_obj(ti.executor_config), scheduler_job_id="worker-config", namespace=kube_config.executor_namespace, base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file), ) pod_mutation_hook(pod) api_client = ApiClient() date_string = pod_generator.datetime_to_label_safe_datestring(execution_date) yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml" os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True) with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output: sanitized_pod = api_client.sanitize_for_serialization(pod) output.write(yaml.dump(sanitized_pod)) print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
def test_pod_template_file(self): fixture = sys.path[0] + '/tests/kubernetes/pod.yaml' self.kube_config.pod_template_file = fixture worker_config = WorkerConfiguration(self.kube_config) result = worker_config.as_pod() expected = PodGenerator.deserialize_model_file(fixture) expected.metadata.name = ANY self.assertEqual(expected, result)
def test_construct_pod(self, mock_uuid): path = sys.path[ 0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml' worker_config = PodGenerator.deserialize_model_file(path) mock_uuid.return_value = self.static_uuid executor_config = k8s.V1Pod(spec=k8s.V1PodSpec(containers=[ k8s.V1Container(name='', resources=k8s.V1ResourceRequirements(limits={ 'cpu': '1m', 'memory': '1G' })) ])) result = PodGenerator.construct_pod( dag_id=self.dag_id, task_id=self.task_id, pod_id='pod_id', kube_image='airflow_image', try_number=self.try_number, date=self.execution_date, args=['command'], pod_override_object=executor_config, base_worker_pod=worker_config, namespace='test_namespace', scheduler_job_id='uuid', ) expected = self.expected expected.metadata.labels = self.labels expected.metadata.labels['app'] = 'myapp' expected.metadata.annotations = self.annotations expected.metadata.name = 'pod_id.' + self.static_uuid.hex expected.metadata.namespace = 'test_namespace' expected.spec.containers[0].args = ['command'] expected.spec.containers[0].image = 'airflow_image' expected.spec.containers[0].resources = { 'limits': { 'cpu': '1m', 'memory': '1G' } } expected.spec.containers[0].env.append( k8s.V1EnvVar( name="AIRFLOW_IS_K8S_EXECUTOR_POD", value='True', )) result_dict = self.k8s_client.sanitize_for_serialization(result) expected_dict = self.k8s_client.sanitize_for_serialization( self.expected) assert expected_dict == result_dict
def run_next(self, next_job: KubernetesJobType) -> None: """ The run_next command will check the task_queue for any un-run jobs. It will then create a unique job-id, launch that job in the cluster, and store relevant info in the current_jobs map so we can track the job's status """ self.log.info('Kubernetes job is %s', str(next_job)) key, command, kube_executor_config = next_job dag_id, task_id, execution_date, try_number = key if command[0:3] != ["airflow", "tasks", "run"]: raise ValueError( 'The command must start with ["airflow", "tasks", "run"].') base_worker_pod = PodGenerator.deserialize_model_file( self.kube_config.pod_template_file) if not base_worker_pod: raise AirflowException( f"could not find a valid worker template yaml at {self.kube_config.pod_template_file}" ) pod = PodGenerator.construct_pod( namespace=self.namespace, scheduler_job_id=self.scheduler_job_id, pod_id=create_pod_id(dag_id, task_id), dag_id=dag_id, task_id=task_id, kube_image=self.kube_config.kube_image, try_number=try_number, date=execution_date, command=command, pod_override_object=kube_executor_config, base_worker_pod=base_worker_pod, ) # Reconcile the pod generated by the Operator and the Pod # generated by the .cfg file self.log.debug("Kubernetes running for command %s", command) self.log.debug("Kubernetes launching image %s", pod.spec.containers[0].image) # the watcher will monitor pods, so we do not block. self.launcher.run_pod_async( pod, **self.kube_config.kube_client_request_args) self.log.debug("Kubernetes Job created!")
def test_deserialize_model_string(self): fixture = """ apiVersion: v1 kind: Pod metadata: name: memory-demo namespace: mem-example spec: containers: - name: memory-demo-ctr image: apache/airflow:stress-2020.07.10-1.0.4 resources: limits: memory: "200Mi" requests: memory: "100Mi" command: ["stress"] args: ["--vm", "1", "--vm-bytes", "150M", "--vm-hang", "1"] """ result = PodGenerator.deserialize_model_file(fixture) sanitized_res = self.k8s_client.sanitize_for_serialization(result) assert sanitized_res == self.deserialize_result
def test_deserialize_model_file(self): path = sys.path[0] + '/tests/kubernetes/pod.yaml' result = PodGenerator.deserialize_model_file(path) sanitized_res = self.k8s_client.sanitize_for_serialization(result) assert sanitized_res == self.deserialize_result
def test_deserialize_model_file(self): fixture = 'tests/kubernetes/pod.yaml' result = PodGenerator.deserialize_model_file(fixture) sanitized_res = self.k8s_client.sanitize_for_serialization(result) self.assertEqual(sanitized_res, self.deserialize_result)