Пример #1
0
    def run_next(self, next_job: KubernetesJobType) -> None:
        """
        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job))
        key, command, kube_executor_config = next_job
        dag_id, task_id, execution_date, try_number = key

        if isinstance(command, str):
            command = [command]

        pod = PodGenerator.construct_pod(
            namespace=self.namespace,
            worker_uuid=self.worker_uuid,
            pod_id=self._create_pod_id(dag_id, task_id),
            dag_id=pod_generator.make_safe_label_value(dag_id),
            task_id=pod_generator.make_safe_label_value(task_id),
            try_number=try_number,
            date=self._datetime_to_label_safe_datestring(execution_date),
            command=command,
            kube_executor_config=kube_executor_config,
            worker_config=self.worker_configuration_pod)
        # Reconcile the pod generated by the Operator and the Pod
        # generated by the .cfg file
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s",
                       pod.spec.containers[0].image)

        # the watcher will monitor pods, so we do not block.
        self.launcher.run_pod_async(
            pod, **self.kube_config.kube_client_request_args)
        self.log.debug("Kubernetes Job created!")
    def test_construct_pod_empty_executor_config(self, mock_uuid):
        path = sys.path[0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml'
        worker_config = PodGenerator.deserialize_model_file(path)
        mock_uuid.return_value = self.static_uuid
        executor_config = None

        result = PodGenerator.construct_pod(
            dag_id='dag_id',
            task_id='task_id',
            pod_id='pod_id',
            kube_image='test-image',
            try_number=3,
            date=self.execution_date,
            args=['command'],
            pod_override_object=executor_config,
            base_worker_pod=worker_config,
            namespace='namespace',
            scheduler_job_id='uuid',
        )
        sanitized_result = self.k8s_client.sanitize_for_serialization(result)
        worker_config.spec.containers[0].image = "test-image"
        worker_config.spec.containers[0].args = ["command"]
        worker_config.metadata.annotations = self.annotations
        worker_config.metadata.labels = self.labels
        worker_config.metadata.labels['app'] = 'myapp'
        worker_config.metadata.name = 'pod_id-' + self.static_uuid.hex
        worker_config.metadata.namespace = 'namespace'
        worker_config.spec.containers[0].env.append(
            k8s.V1EnvVar(name="AIRFLOW_IS_K8S_EXECUTOR_POD", value='True')
        )
        worker_config_result = self.k8s_client.sanitize_for_serialization(worker_config)
        assert worker_config_result == sanitized_result
Пример #3
0
    def test_construct_pod_empty_execuctor_config(self, mock_uuid):
        mock_uuid.return_value = self.static_uuid
        worker_config = k8s.V1Pod(
            spec=k8s.V1PodSpec(
                containers=[
                    k8s.V1Container(
                        name='',
                        resources=k8s.V1ResourceRequirements(
                            limits={
                                'cpu': '1m',
                                'memory': '1G'
                            }
                        )
                    )
                ]
            )
        )
        executor_config = None

        result = PodGenerator.construct_pod(
            'dag_id',
            'task_id',
            'pod_id',
            3,
            'date',
            ['command'],
            executor_config,
            worker_config,
            'namespace',
            'uuid',
        )
        sanitized_result = self.k8s_client.sanitize_for_serialization(result)

        self.assertEqual({
            'apiVersion': 'v1',
            'kind': 'Pod',
            'metadata': self.metadata,
            'spec': {
                'containers': [{
                    'args': [],
                    'command': ['command'],
                    'env': [],
                    'envFrom': [],
                    'imagePullPolicy': 'IfNotPresent',
                    'name': 'base',
                    'ports': [],
                    'resources': {
                        'limits': {
                            'cpu': '1m',
                            'memory': '1G'
                        }
                    },
                    'volumeMounts': []
                }],
                'hostNetwork': False,
                'imagePullSecrets': [],
                'restartPolicy': 'Never',
                'volumes': []
            }
        }, sanitized_result)
Пример #4
0
    def test_ensure_max_label_length(self, mock_uuid):
        mock_uuid.return_value = self.static_uuid
        path = os.path.join(os.path.dirname(__file__),
                            'pod_generator_base_with_secrets.yaml')
        worker_config = PodGenerator.deserialize_model_file(path)

        result = PodGenerator.construct_pod(
            dag_id='a' * 512,
            task_id='a' * 512,
            pod_id='a' * 512,
            kube_image='a' * 512,
            try_number=3,
            date=self.execution_date,
            args=['command'],
            namespace='namespace',
            scheduler_job_id='a' * 512,
            pod_override_object=None,
            base_worker_pod=worker_config,
        )

        assert result.metadata.name == 'a' * 63 + '.' + self.static_uuid.hex
        for _, v in result.metadata.labels.items():
            assert len(v) <= 63

        assert 'a' * 512 == result.metadata.annotations['dag_id']
        assert 'a' * 512 == result.metadata.annotations['task_id']
Пример #5
0
    def test_make_pod_assert_labels(self):
        # Tests the pod created has all the expected labels set
        self.kube_config.dags_folder = 'dags'

        worker_config = WorkerConfiguration(self.kube_config)
        execution_date = parser.parse('2019-11-21 11:08:22.920875')
        pod = PodGenerator.construct_pod(
            "test_dag_id",
            "test_task_id",
            "test_pod_id",
            1,
            execution_date,
            ["bash -c 'ls /'"],
            None,
            worker_config.as_pod(),
            "default",
            "sample-uuid",
        )
        expected_labels = {
            'airflow-worker': 'sample-uuid',
            'airflow_version': airflow_version.replace('+', '-'),
            'dag_id': 'test_dag_id',
            'execution_date': datetime_to_label_safe_datestring(execution_date),
            'kubernetes_executor': 'True',
            'my_label': 'label_id',
            'task_id': 'test_task_id',
            'try_number': '1'
        }
        self.assertEqual(pod.metadata.labels, expected_labels)
Пример #6
0
def generate_pod_yaml(args):
    """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor"""
    execution_date = args.execution_date
    dag = get_dag(subdir=args.subdir, dag_id=args.dag_id)
    yaml_output_path = args.output_path
    kube_config = KubeConfig()
    for task in dag.tasks:
        ti = TaskInstance(task, execution_date)
        pod = PodGenerator.construct_pod(
            dag_id=args.dag_id,
            task_id=ti.task_id,
            pod_id=create_pod_id(args.dag_id, ti.task_id),
            try_number=ti.try_number,
            kube_image=kube_config.kube_image,
            date=ti.execution_date,
            args=ti.command_as_list(),
            pod_override_object=PodGenerator.from_obj(ti.executor_config),
            scheduler_job_id="worker-config",
            namespace=kube_config.executor_namespace,
            base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file),
        )
        pod_mutation_hook(pod)
        api_client = ApiClient()
        date_string = pod_generator.datetime_to_label_safe_datestring(execution_date)
        yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml"
        os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True)
        with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output:
            sanitized_pod = api_client.sanitize_for_serialization(pod)
            output.write(yaml.dump(sanitized_pod))
    print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
Пример #7
0
    def run_next(self, next_job: KubernetesJobType) -> None:
        """
        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job))
        key, command, kube_executor_config = next_job
        dag_id, task_id, execution_date, try_number = key

        if command[0:3] != ["airflow", "tasks", "run"]:
            raise ValueError('The command must start with ["airflow", "tasks", "run"].')

        pod = PodGenerator.construct_pod(
            namespace=self.namespace,
            worker_uuid=self.worker_uuid,
            pod_id=self._create_pod_id(dag_id, task_id),
            dag_id=dag_id,
            task_id=task_id,
            try_number=try_number,
            date=execution_date,
            command=command,
            kube_executor_config=kube_executor_config,
            worker_config=self.worker_configuration_pod
        )
        # Reconcile the pod generated by the Operator and the Pod
        # generated by the .cfg file
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s", pod.spec.containers[0].image)

        # the watcher will monitor pods, so we do not block.
        self.launcher.run_pod_async(pod, **self.kube_config.kube_client_request_args)
        self.log.debug("Kubernetes Job created!")
Пример #8
0
    def test_construct_pod(self, mock_uuid):
        path = sys.path[
            0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml'
        worker_config = PodGenerator.deserialize_model_file(path)
        mock_uuid.return_value = self.static_uuid
        executor_config = k8s.V1Pod(spec=k8s.V1PodSpec(containers=[
            k8s.V1Container(name='',
                            resources=k8s.V1ResourceRequirements(limits={
                                'cpu': '1m',
                                'memory': '1G'
                            }))
        ]))

        result = PodGenerator.construct_pod(
            dag_id=self.dag_id,
            task_id=self.task_id,
            pod_id='pod_id',
            kube_image='airflow_image',
            try_number=self.try_number,
            date=self.execution_date,
            args=['command'],
            pod_override_object=executor_config,
            base_worker_pod=worker_config,
            namespace='test_namespace',
            scheduler_job_id='uuid',
        )
        expected = self.expected
        expected.metadata.labels = self.labels
        expected.metadata.labels['app'] = 'myapp'
        expected.metadata.annotations = self.annotations
        expected.metadata.name = 'pod_id.' + self.static_uuid.hex
        expected.metadata.namespace = 'test_namespace'
        expected.spec.containers[0].args = ['command']
        expected.spec.containers[0].image = 'airflow_image'
        expected.spec.containers[0].resources = {
            'limits': {
                'cpu': '1m',
                'memory': '1G'
            }
        }
        expected.spec.containers[0].env.append(
            k8s.V1EnvVar(
                name="AIRFLOW_IS_K8S_EXECUTOR_POD",
                value='True',
            ))
        result_dict = self.k8s_client.sanitize_for_serialization(result)
        expected_dict = self.k8s_client.sanitize_for_serialization(
            self.expected)

        assert expected_dict == result_dict
Пример #9
0
    def run_next(self, next_job: KubernetesJobType) -> None:
        """
        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job).replace("\n", " "))
        key, command, kube_executor_config, pod_template_file = next_job
        dag_id, task_id, run_id, try_number, map_index = key

        if command[0:3] != ["airflow", "tasks", "run"]:
            raise ValueError(
                'The command must start with ["airflow", "tasks", "run"].')

        base_worker_pod = get_base_pod_from_template(pod_template_file,
                                                     self.kube_config)

        if not base_worker_pod:
            raise AirflowException(
                f"could not find a valid worker template yaml at {self.kube_config.pod_template_file}"
            )

        pod = PodGenerator.construct_pod(
            namespace=self.namespace,
            scheduler_job_id=self.scheduler_job_id,
            pod_id=create_pod_id(dag_id, task_id),
            dag_id=dag_id,
            task_id=task_id,
            kube_image=self.kube_config.kube_image,
            try_number=try_number,
            map_index=map_index,
            date=None,
            run_id=run_id,
            args=command,
            pod_override_object=kube_executor_config,
            base_worker_pod=base_worker_pod,
        )
        # Reconcile the pod generated by the Operator and the Pod
        # generated by the .cfg file
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s",
                       pod.spec.containers[0].image)

        # the watcher will monitor pods, so we do not block.
        self.run_pod_async(pod, **self.kube_config.kube_client_request_args)
        self.log.debug("Kubernetes Job created!")
Пример #10
0
def generate_pod_yaml(args):
    """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor"""

    from kubernetes.client.api_client import ApiClient

    from airflow.executors.kubernetes_executor import AirflowKubernetesScheduler, KubeConfig
    from airflow.kubernetes import pod_generator
    from airflow.kubernetes.pod_generator import PodGenerator
    from airflow.kubernetes.worker_configuration import WorkerConfiguration
    from airflow.settings import pod_mutation_hook

    execution_date = args.execution_date
    dag = get_dag(subdir=args.subdir, dag_id=args.dag_id)
    yaml_output_path = args.output_path
    kube_config = KubeConfig()
    for task in dag.tasks:
        ti = TaskInstance(task, execution_date)
        pod = PodGenerator.construct_pod(
            dag_id=args.dag_id,
            task_id=ti.task_id,
            pod_id=AirflowKubernetesScheduler._create_pod_id(  # pylint: disable=W0212
                args.dag_id, ti.task_id),
            try_number=ti.try_number,
            kube_image=kube_config.kube_image,
            date=ti.execution_date,
            command=ti.command_as_list(),
            pod_override_object=PodGenerator.from_obj(ti.executor_config),
            worker_uuid="worker-config",
            namespace=kube_config.executor_namespace,
            base_worker_pod=WorkerConfiguration(
                kube_config=kube_config).as_pod())
        pod_mutation_hook(pod)
        api_client = ApiClient()
        date_string = pod_generator.datetime_to_label_safe_datestring(
            execution_date)
        yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml"
        os.makedirs(os.path.dirname(yaml_output_path +
                                    "/airflow_yaml_output/"),
                    exist_ok=True)
        with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name,
                  "w") as output:
            sanitized_pod = api_client.sanitize_for_serialization(pod)
            output.write(yaml.dump(sanitized_pod))
    print(
        f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
    def test_add_custom_label(self):
        from kubernetes.client import models as k8s

        pod = PodGenerator.construct_pod(
            namespace="test",
            worker_uuid="test",
            pod_id="test",
            dag_id="test",
            task_id="test",
            try_number=1,
            date="23-07-2020",
            command="test",
            kube_executor_config=None,
            worker_config=k8s.V1Pod(metadata=k8s.V1ObjectMeta(
                labels={"airflow-test": "airflow-task-pod"},
                annotations={"my.annotation": "foo"})))
        self.assertIn("airflow-test", pod.metadata.labels)
        self.assertIn("my.annotation", pod.metadata.annotations)
Пример #12
0
    def run_next(self, next_job):
        """
        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job))
        key, command, kube_executor_config = next_job
        dag_id, task_id, execution_date, try_number = key

        if command[0:2] != ["airflow", "run"]:
            raise ValueError('The command must start with ["airflow", "run"].')

        pod = PodGenerator.construct_pod(
            namespace=self.namespace,
            worker_uuid=self.worker_uuid,
            pod_id=self._create_pod_id(dag_id, task_id),
            dag_id=pod_generator.make_safe_label_value(dag_id),
            task_id=pod_generator.make_safe_label_value(task_id),
            try_number=try_number,
            kube_image=self.kube_config.kube_image,
            date=execution_date,
            command=command,
            pod_override_object=kube_executor_config,
            base_worker_pod=self.worker_configuration_pod)

        sanitized_pod = self.launcher._client.api_client.sanitize_for_serialization(
            pod)
        json_pod = json.dumps(sanitized_pod, indent=2)

        self.log.debug('Pod Creation Request before mutation: \n%s', json_pod)

        # Reconcile the pod generated by the Operator and the Pod
        # generated by the .cfg file
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s",
                       pod.spec.containers[0].image)

        # the watcher will monitor pods, so we do not block.
        self.launcher.run_pod_async(
            pod, **self.kube_config.kube_client_request_args)
        self.log.debug("Kubernetes Job created!")
    def test_construct_pod_with_mutation(self, mock_uuid):
        mock_uuid.return_value = self.static_uuid
        worker_config = k8s.V1Pod(
            metadata=k8s.V1ObjectMeta(name='gets-overridden-by-dynamic-args',
                                      annotations={'should': 'stay'}),
            spec=k8s.V1PodSpec(containers=[
                k8s.V1Container(name='doesnt-override',
                                resources=k8s.V1ResourceRequirements(
                                    limits={
                                        'cpu': '1m',
                                        'memory': '1G'
                                    }),
                                security_context=k8s.V1SecurityContext(
                                    run_as_user=1))
            ]))
        executor_config = k8s.V1Pod(spec=k8s.V1PodSpec(containers=[
            k8s.V1Container(name='doesnt-override-either',
                            resources=k8s.V1ResourceRequirements(limits={
                                'cpu': '2m',
                                'memory': '2G'
                            }))
        ]))

        result = PodGenerator.construct_pod(
            'dag_id',
            'task_id',
            'pod_id',
            3,
            'date',
            ['command'],
            executor_config,
            worker_config,
            'namespace',
            'uuid',
        )
        sanitized_result = self.k8s_client.sanitize_for_serialization(result)

        self.metadata.update({'annotations': {'should': 'stay'}})

        self.assertEqual(
            {
                'apiVersion': 'v1',
                'kind': 'Pod',
                'metadata': self.metadata,
                'spec': {
                    'containers': [{
                        'args': [],
                        'command': ['command'],
                        'env': [],
                        'envFrom': [],
                        'name': 'base',
                        'ports': [],
                        'resources': {
                            'limits': {
                                'cpu': '2m',
                                'memory': '2G'
                            }
                        },
                        'volumeMounts': [],
                        'securityContext': {
                            'runAsUser': 1
                        }
                    }],
                    'hostNetwork':
                    False,
                    'imagePullSecrets': [],
                    'volumes': []
                }
            }, sanitized_result)