def _extract_volumes(volumes):
    result = []
    volumes = volumes or []  # type: List[Union[k8s.V1Volume, dict]]
    for volume in volumes:
        if isinstance(volume, k8s.V1Volume):
            volume = api_client.sanitize_for_serialization(volume)
            volume = Volume(name=volume.get("name"), configs=volume)
        if not isinstance(volume, Volume):
            volume = Volume(name=volume.get("name"), configs=volume)
        result.append(volume)
    return result
    def test_volume_mount():
        with mock.patch.object(PodLauncher, 'log') as mock_logger:
            volume_mount = VolumeMount('test-volume',
                                       mount_path='/root/mount_file',
                                       sub_path=None,
                                       read_only=True)

            volume_config = {
                'persistentVolumeClaim':
                    {
                        'claimName': 'test-volume'
                    }
            }
            volume = Volume(name='test-volume', configs=volume_config)
            k = KubernetesPodOperator(
                namespace='default',
                image="ubuntu:16.04",
                cmds=["bash", "-cx"],
                arguments=["cat /root/mount_file/test.txt"],
                labels={"foo": "bar"},
                volume_mounts=[volume_mount],
                volumes=[volume],
                name="test",
                task_id="task"
            )
            k.execute(None)
            mock_logger.info.assert_any_call(b"retrieved from mount\n")
 def test_to_k8s_object(self):
     volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}}
     volume = Volume(name='test-volume', configs=volume_config)
     expected_volume = k8s.V1Volume(
         name="test-volume",
         persistent_volume_claim={"claimName": "test-volume"})
     result = volume.to_k8s_client_obj()
     self.assertEqual(result, expected_volume)
    def test_volume_mount(self):
        with patch.object(PodManager, 'log') as mock_logger:
            volume_mount = VolumeMount('test-volume',
                                       mount_path='/tmp/test_volume',
                                       sub_path=None,
                                       read_only=False)

            volume_config = {
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }
            volume = Volume(name='test-volume', configs=volume_config)
            args = [
                "echo \"retrieved from mount\" > /tmp/test_volume/test.txt "
                "&& cat /tmp/test_volume/test.txt"
            ]
            k = KubernetesPodOperator(
                namespace='default',
                image="ubuntu:16.04",
                cmds=["bash", "-cx"],
                arguments=args,
                labels={"foo": "bar"},
                volume_mounts=[volume_mount],
                volumes=[volume],
                is_delete_operator_pod=False,
                name="test",
                task_id="task",
                in_cluster=False,
                do_xcom_push=False,
            )
            context = create_context(k)
            k.execute(context=context)
            mock_logger.info.assert_any_call('retrieved from mount')
            actual_pod = self.api_client.sanitize_for_serialization(k.pod)
            expected_pod = copy(self.expected_pod)
            expected_pod['spec']['containers'][0]['args'] = args
            expected_pod['spec']['containers'][0]['volumeMounts'] = [{
                'name':
                'test-volume',
                'mountPath':
                '/tmp/test_volume',
                'readOnly':
                False
            }]
            expected_pod['spec']['volumes'] = [{
                'name': 'test-volume',
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }]
            expected_pod['metadata']['labels']['already_checked'] = 'True'
            assert expected_pod == actual_pod
    def test_init_container(self):
        # GIVEN
        volume_mounts = [
            k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True)
        ]

        init_environments = [
            k8s.V1EnvVar(name='key1', value='value1'),
            k8s.V1EnvVar(name='key2', value='value2'),
        ]

        init_container = k8s.V1Container(
            name="init-container",
            image="ubuntu:16.04",
            env=init_environments,
            volume_mounts=volume_mounts,
            command=["bash", "-cx"],
            args=["echo 10"],
        )

        volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}}
        volume = Volume(name='test-volume', configs=volume_config)

        expected_init_container = {
            'name': 'init-container',
            'image': 'ubuntu:16.04',
            'command': ['bash', '-cx'],
            'args': ['echo 10'],
            'env': [{'name': 'key1', 'value': 'value1'}, {'name': 'key2', 'value': 'value2'}],
            'volumeMounts': [{'mountPath': '/etc/foo', 'name': 'test-volume', 'readOnly': True}],
        }

        k = KubernetesPodOperator(
            namespace='default',
            image="ubuntu:16.04",
            cmds=["bash", "-cx"],
            arguments=["echo 10"],
            labels={"foo": "bar"},
            name="test",
            task_id="task",
            volumes=[volume],
            init_containers=[init_container],
            in_cluster=False,
            do_xcom_push=False,
        )
        context = create_context(k)
        k.execute(context)
        actual_pod = self.api_client.sanitize_for_serialization(k.pod)
        self.expected_pod['spec']['initContainers'] = [expected_init_container]
        self.expected_pod['spec']['volumes'] = [
            {'name': 'test-volume', 'persistentVolumeClaim': {'claimName': 'test-volume'}}
        ]
        assert self.expected_pod == actual_pod
Пример #6
0
 def _volumes(self):
     volumes_config = self.liminal_config.get('volumes', [])
     volumes = []
     for volume_config in volumes_config:
         name = volume_config['volume']
         volume = Volume(name=name,
                         configs={
                             'persistentVolumeClaim': {
                                 'claimName': f"{name}-pvc"
                             }
                         })
         volumes.append(volume)
     return volumes
Пример #7
0
 def _volumes(self):
     volumes_config = self.liminal_config.get('volumes', [])
     volumes = []
     for volume_config in volumes_config:
         name = volume_config['volume']
         claim_name = volume_config.get('claim_name')
         if not claim_name and 'local' in volume_config:
             claim_name = f'{name}-pvc'
         volume = Volume(
             name=name,
             configs={'persistentVolumeClaim': {
                 'claimName': claim_name
             }})
         volumes.append(volume)
     return volumes
Пример #8
0
    def test_volume_mount(self):
        with mock.patch.object(PodLauncher, 'log') as mock_logger:
            volume_mount = VolumeMount('test-volume',
                                       mount_path='/root/mount_file',
                                       sub_path=None,
                                       read_only=True)

            volume_config = {
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }
            volume = Volume(name='test-volume', configs=volume_config)
            args = ["cat /root/mount_file/test.txt"]
            k = KubernetesPodOperator(
                namespace='default',
                image="ubuntu:16.04",
                cmds=["bash", "-cx"],
                arguments=args,
                labels={"foo": "bar"},
                volume_mounts=[volume_mount],
                volumes=[volume],
                name="test",
                task_id="task",
                in_cluster=False,
                do_xcom_push=False,
            )
            context = self.create_context(k)
            k.execute(context=context)
            mock_logger.info.assert_any_call(b"retrieved from mount\n")
            actual_pod = self.api_client.sanitize_for_serialization(k.pod)
            self.expected_pod['spec']['containers'][0]['args'] = args
            self.expected_pod['spec']['containers'][0]['volumeMounts'] = [{
                'name':
                'test-volume',
                'mountPath':
                '/root/mount_file',
                'readOnly':
                True
            }]
            self.expected_pod['spec']['volumes'] = [{
                'name': 'test-volume',
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }]
            self.assertEqual(self.expected_pod, actual_pod)
def build_kubernetes_pod_exporter(dag, command, etl_cmd_string, output_file):
    '''
    Creates the export task using a KubernetesPodOperator.
    Parameters:
        dag - the parent dag
        command - stellar-etl command type (ex. export_ledgers, export_accounts)
        etl_cmd_string - a string of the fully formed command that includes all flags and arguments to be sent to the etl
        output_file - filename for the output file or folder
    Returns:
        the KubernetesPodOperator for the export task
    '''
    from airflow.kubernetes.volume import Volume
    from airflow.kubernetes.volume_mount import VolumeMount

    data_mount = VolumeMount(Variable.get('volume_name'), Variable.get("image_output_path"), '', False)
    volume_config = {
        'persistentVolumeClaim':
        {
            'claimName': Variable.get('volume_claim_name')
        }   
    }
    data_volume = Volume(Variable.get('volume_name'), volume_config)

    cmd = ['bash']
    args = ['-c', f'{etl_cmd_string} && mkdir -p /airflow/xcom/ && echo \'{{"output_file":"{output_file}"}}\' >> /airflow/xcom/return.json']
    
    config_file_location = Variable.get('kube_config_location')
    in_cluster = False if config_file_location else True
    
    return KubernetesPodOperator(
        task_id=command + '_task',
        name=command + '_task',
        namespace=Variable.get('namespace'),
        image=Variable.get('image_name'),
        cmds=cmd,
        arguments=args,
        dag=dag,
        do_xcom_push=True,
        is_delete_operator_pod=True,
        in_cluster=in_cluster,
        config_file=config_file_location,
        volume_mounts=[data_mount],
        volumes=[data_volume],
        affinity=Variable.get('affinity', deserialize_json=True)
    )
)

affinity = ONDEMAND_NODE_AFFINITY

s3_backup_volume_mount = VolumeMount(name="s3-backup-volume",
                                     mount_path=BACKUP_PATH,
                                     sub_path=None,
                                     read_only=False)

s3_backup_volume_config = {
    "persistentVolumeClaim": {
        "claimName": "s3-backup-volume"
    }
}

s3_backup_volume = Volume(name="s3-backup-volume",
                          configs=s3_backup_volume_config)

with dag:
    START = DummyOperator(task_id="start")

    # Wait for S3 Key
    S3_BACKUP_SENSE = S3KeySensor(
        task_id="s3-backup-sense",
        poke_interval=60 * 30,
        bucket_key=S3_KEY,
        aws_conn_id="aws_nci_db_backup",
    )

    # Download NCI db incremental backup from S3 and restore to RDS Aurora
    RESTORE_NCI_INCREMENTAL_SYNC = KubernetesPodOperator(
        namespace="processing",
Пример #11
0
    def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator:
        """
        Takes an operator and params and creates an instance of that operator.

        :returns: instance of operator object
        """
        try:
            # class is a Callable https://stackoverflow.com/a/34578836/3679900
            operator_obj: Callable[..., BaseOperator] = import_string(operator)
        except Exception as err:
            raise Exception(f"Failed to import operator: {operator}") from err
        try:
            if operator_obj in [PythonOperator, BranchPythonOperator]:
                if not task_params.get(
                        "python_callable_name") and not task_params.get(
                            "python_callable_file"):
                    raise Exception(
                        "Failed to create task. PythonOperator and BranchPythonOperator requires \
                        `python_callable_name` and `python_callable_file` parameters."
                    )
                task_params[
                    "python_callable"]: Callable = utils.get_python_callable(
                        task_params["python_callable_name"],
                        task_params["python_callable_file"],
                    )
                # remove dag-factory specific parameters
                # Airflow 2.0 doesn't allow these to be passed to operator
                del task_params["python_callable_name"]
                del task_params["python_callable_file"]

            # Check for the custom success and failure callables in SqlSensor. These are considered
            # optional, so no failures in case they aren't found. Note: there's no reason to
            # declare both a callable file and a lambda function for success/failure parameter.
            # If both are found the object will not throw and error, instead callable file will
            # take precedence over the lambda function
            if operator_obj in [SqlSensor]:
                # Success checks
                if task_params.get("success_check_file") and task_params.get(
                        "success_check_name"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable(
                            task_params["success_check_name"],
                            task_params["success_check_file"],
                        )
                    del task_params["success_check_name"]
                    del task_params["success_check_file"]
                elif task_params.get("success_check_lambda"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable_lambda(
                            task_params["success_check_lambda"])
                    del task_params["success_check_lambda"]
                # Failure checks
                if task_params.get("failure_check_file") and task_params.get(
                        "failure_check_name"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable(
                            task_params["failure_check_name"],
                            task_params["failure_check_file"],
                        )
                    del task_params["failure_check_name"]
                    del task_params["failure_check_file"]
                elif task_params.get("failure_check_lambda"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable_lambda(
                            task_params["failure_check_lambda"])
                    del task_params["failure_check_lambda"]

            if operator_obj in [HttpSensor]:
                if not (task_params.get("response_check_name")
                        and task_params.get("response_check_file")
                        ) and not task_params.get("response_check_lambda"):
                    raise Exception(
                        "Failed to create task. HttpSensor requires \
                        `response_check_name` and `response_check_file` parameters \
                        or `response_check_lambda` parameter.")
                if task_params.get("response_check_file"):
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable(
                            task_params["response_check_name"],
                            task_params["response_check_file"],
                        )
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_name"]
                    del task_params["response_check_file"]
                else:
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable_lambda(
                            task_params["response_check_lambda"])
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_lambda"]

            # KubernetesPodOperator
            if operator_obj == KubernetesPodOperator:
                task_params["secrets"] = ([
                    Secret(**v) for v in task_params.get("secrets")
                ] if task_params.get("secrets") is not None else None)

                task_params["ports"] = ([
                    Port(**v) for v in task_params.get("ports")
                ] if task_params.get("ports") is not None else None)
                task_params["volume_mounts"] = ([
                    VolumeMount(**v) for v in task_params.get("volume_mounts")
                ] if task_params.get("volume_mounts") is not None else None)
                task_params["volumes"] = ([
                    Volume(**v) for v in task_params.get("volumes")
                ] if task_params.get("volumes") is not None else None)
                task_params["pod_runtime_info_envs"] = ([
                    PodRuntimeInfoEnv(**v)
                    for v in task_params.get("pod_runtime_info_envs")
                ] if task_params.get("pod_runtime_info_envs") is not None else
                                                        None)
                task_params["full_pod_spec"] = (
                    V1Pod(**task_params.get("full_pod_spec"))
                    if task_params.get("full_pod_spec") is not None else None)
                task_params["init_containers"] = ([
                    V1Container(**v)
                    for v in task_params.get("init_containers")
                ] if task_params.get("init_containers") is not None else None)

            if utils.check_dict_key(task_params, "execution_timeout_secs"):
                task_params["execution_timeout"]: timedelta = timedelta(
                    seconds=task_params["execution_timeout_secs"])
                del task_params["execution_timeout_secs"]

            if utils.check_dict_key(task_params, "sla_secs"):
                task_params["sla"]: timedelta = timedelta(
                    seconds=task_params["sla_secs"])
                del task_params["sla_secs"]

            if utils.check_dict_key(task_params, "execution_delta_secs"):
                task_params["execution_delta"]: timedelta = timedelta(
                    seconds=task_params["execution_delta_secs"])
                del task_params["execution_delta_secs"]

            if utils.check_dict_key(
                    task_params,
                    "execution_date_fn_name") and utils.check_dict_key(
                        task_params, "execution_date_fn_file"):
                task_params[
                    "execution_date_fn"]: Callable = utils.get_python_callable(
                        task_params["execution_date_fn_name"],
                        task_params["execution_date_fn_file"],
                    )
                del task_params["execution_date_fn_name"]
                del task_params["execution_date_fn_file"]

            # on_execute_callback is an Airflow 2.0 feature
            if utils.check_dict_key(
                    task_params, "on_execute_callback"
            ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"):
                task_params["on_execute_callback"]: Callable = import_string(
                    task_params["on_execute_callback"])

            if utils.check_dict_key(task_params, "on_failure_callback"):
                task_params["on_failure_callback"]: Callable = import_string(
                    task_params["on_failure_callback"])

            if utils.check_dict_key(task_params, "on_success_callback"):
                task_params["on_success_callback"]: Callable = import_string(
                    task_params["on_success_callback"])

            if utils.check_dict_key(task_params, "on_retry_callback"):
                task_params["on_retry_callback"]: Callable = import_string(
                    task_params["on_retry_callback"])

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "variables_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "variables_as_arguments")
                for variable in variables:
                    if Variable.get(variable["variable"],
                                    default_var=None) is not None:
                        task_params[variable["attribute"]] = Variable.get(
                            variable["variable"], default_var=None)
                del task_params["variables_as_arguments"]

            task: BaseOperator = operator_obj(**task_params)
        except Exception as err:
            raise Exception(f"Failed to create {operator_obj} task") from err
        return task
OWS_SECRETS = [
    Secret("env", "DB_USERNAME", SECRET_OWS_WRITER_NAME, "postgres-username"),
    Secret("env", "DB_PASSWORD", SECRET_OWS_WRITER_NAME, "postgres-password"),
]

# MOUNT OWS_CFG via init_container
# for main container mount
ows_cfg_mount = VolumeMount("ows-config-volume",
                            mount_path=OWS_CFG_MOUNT_PATH,
                            sub_path=None,
                            read_only=False)

ows_cfg_volume_config = {}

ows_cfg_volume = Volume(name="ows-config-volume",
                        configs=ows_cfg_volume_config)

# for init container mount
cfg_image_mount = k8s.V1VolumeMount(
    mount_path=OWS_CFG_MOUNT_PATH,
    name="ows-config-volume",
    sub_path=None,
    read_only=False,
)

config_container = k8s.V1Container(
    image=OWS_CONFIG_IMAGE,
    command=["cp"],
    args=["-r", OWS_CFG_IMAGEPATH, OWS_CFG_FOLDER_PATH],
    volume_mounts=[cfg_image_mount],
    name="mount-ows-config",
Пример #13
0
    def test_to_v1_pod(self, mock_uuid):
        from airflow.contrib.kubernetes.pod import Pod as DeprecatedPod
        from airflow.kubernetes.volume import Volume
        from airflow.kubernetes.volume_mount import VolumeMount
        from airflow.kubernetes.secret import Secret
        from airflow.kubernetes.pod import Resources
        import uuid
        static_uuid = uuid.UUID('cf4a56d2-8101-4217-b027-2af6216feb48')
        mock_uuid.return_value = static_uuid

        pod = DeprecatedPod(
            image="foo",
            name="bar",
            namespace="baz",
            image_pull_policy="Never",
            envs={"test_key": "test_value"},
            cmds=["airflow"],
            resources=Resources(request_memory="1G",
                                request_cpu="100Mi",
                                limit_gpu="100G"),
            init_containers=k8s.V1Container(name="test-container",
                                            volume_mounts=k8s.V1VolumeMount(
                                                mount_path="/foo/bar",
                                                name="init-volume-secret")),
            volumes=[
                Volume(name="foo", configs={}), {
                    "name": "bar",
                    'secret': {
                        'secretName': 'volume-secret'
                    }
                }
            ],
            secrets=[
                Secret("volume", None, "init-volume-secret"),
                Secret('env', "AIRFLOW_SECRET", 'secret_name',
                       "airflow_config"),
                Secret("volume", "/opt/airflow", "volume-secret", "secret-key")
            ],
            volume_mounts=[
                VolumeMount(name="foo",
                            mount_path="/mnt",
                            sub_path="/",
                            read_only=True)
            ])

        k8s_client = ApiClient()

        result = pod.to_v1_kubernetes_pod()
        result = k8s_client.sanitize_for_serialization(result)

        expected = \
            {'metadata': {'labels': {}, 'name': 'bar', 'namespace': 'baz'},
             'spec': {'affinity': {},
                      'containers': [{'args': [],
                                      'command': ['airflow'],
                                      'env': [{'name': 'test_key', 'value': 'test_value'},
                                              {'name': 'AIRFLOW_SECRET',
                                               'valueFrom': {'secretKeyRef': {'key': 'airflow_config',
                                                                              'name': 'secret_name'}}}],
                                      'image': 'foo',
                                      'imagePullPolicy': 'Never',
                                      'name': 'base',
                                      'resources': {'limits': {'nvidia.com/gpu': '100G'},
                                                    'requests': {'cpu': '100Mi',
                                                                 'memory': '1G'}},
                                      'volumeMounts': [{'mountPath': '/mnt',
                                                        'name': 'foo',
                                                        'readOnly': True,
                                                        'subPath': '/'},
                                                       {'mountPath': '/opt/airflow',
                                                       'name': 'secretvol' + str(static_uuid),
                                                        'readOnly': True}]}],
                      'hostNetwork': False,
                      'initContainers': {'name': 'test-container',
                                         'volumeMounts': {'mountPath': '/foo/bar',
                                                          'name': 'init-volume-secret'}},
                      'securityContext': {},
                      'tolerations': [],
                      'volumes': [{'name': 'foo'},
                                  {'name': 'bar',
                                   'secret': {'secretName': 'volume-secret'}},
                                  {'name': 'secretvolcf4a56d2-8101-4217-b027-2af6216feb48',
                                   'secret': {'secretName': 'init-volume-secret'}},
                                  {'name': 'secretvol' + str(static_uuid),
                                   'secret': {'secretName': 'volume-secret'}}
                                  ]}}
        self.maxDiff = None
        self.assertEqual(expected, result)
Пример #14
0
    'retries': 0,
    'retry_delay': timedelta(minutes=1)
}

volume_mount_nb = VolumeMount(name='nb-data',
                            mount_path='/opt/airflow/nb',
                            sub_path=None,
                            read_only=False)

volume_config = {
    'persistentVolumeClaim':
        {
            'claimName': 'airflow-nfs-pvc-nb-01'
        }
}
volume_nb = Volume(name='nb-data', configs=volume_config)

dag = DAG(
    'kubernetes_pod_operator_03_with_sleep', default_args=default_args, schedule_interval=timedelta(minutes=10))

start = DummyOperator(task_id='start', dag=dag)

passing = KubernetesPodOperator(namespace='default',
                          image="python:3.6",
                          cmds=["python"],
                          arguments=["/opt/airflow/nb/sleep.py"],
                          labels={"foo": "bar"},
                          name="passing-test",
                          task_id="passing-task",
                          get_logs=True,
                          image_pull_policy="IfNotPresent",
Пример #15
0
    def make_task(operator: str, task_params: Dict[str, Any],
                  af_vars: Dict[str, Any]) -> BaseOperator:
        """
        Takes an operator and params and creates an instance of that operator.

        :returns: instance of operator object
        """
        try:
            # class is a Callable https://stackoverflow.com/a/34578836/3679900
            operator_obj: Callable[..., BaseOperator] = import_string(operator)
        except Exception as err:
            raise Exception(f"Failed to import operator: {operator}") from err
        try:
            if operator_obj in [
                    PythonOperator, BranchPythonOperator, PythonSensor
            ]:
                if (not task_params.get("python_callable")
                        and not task_params.get("python_callable_name")
                        and not task_params.get("python_callable_file")):
                    # pylint: disable=line-too-long
                    raise Exception(
                        "Failed to create task. PythonOperator, BranchPythonOperator and PythonSensor requires \
                        `python_callable_name` and `python_callable_file` "
                        "parameters.\nOptionally you can load python_callable "
                        "from a file. with the special pyyaml notation:\n"
                        "  python_callable_file: !!python/name:my_module.my_func"
                    )
                if not task_params.get("python_callable"):
                    task_params[
                        "python_callable"]: Callable = utils.get_python_callable(
                            task_params["python_callable_name"],
                            task_params["python_callable_file"],
                        )
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["python_callable_name"]
                    del task_params["python_callable_file"]

            # Check for the custom success and failure callables in SqlSensor. These are considered
            # optional, so no failures in case they aren't found. Note: there's no reason to
            # declare both a callable file and a lambda function for success/failure parameter.
            # If both are found the object will not throw and error, instead callable file will
            # take precedence over the lambda function
            if operator_obj in [SqlSensor]:
                # Success checks
                if task_params.get("success_check_file") and task_params.get(
                        "success_check_name"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable(
                            task_params["success_check_name"],
                            task_params["success_check_file"],
                        )
                    del task_params["success_check_name"]
                    del task_params["success_check_file"]
                elif task_params.get("success_check_lambda"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable_lambda(
                            task_params["success_check_lambda"])
                    del task_params["success_check_lambda"]
                # Failure checks
                if task_params.get("failure_check_file") and task_params.get(
                        "failure_check_name"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable(
                            task_params["failure_check_name"],
                            task_params["failure_check_file"],
                        )
                    del task_params["failure_check_name"]
                    del task_params["failure_check_file"]
                elif task_params.get("failure_check_lambda"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable_lambda(
                            task_params["failure_check_lambda"])
                    del task_params["failure_check_lambda"]

            if operator_obj in [HttpSensor]:
                if not (task_params.get("response_check_name")
                        and task_params.get("response_check_file")
                        ) and not task_params.get("response_check_lambda"):
                    raise Exception(
                        "Failed to create task. HttpSensor requires \
                        `response_check_name` and `response_check_file` parameters \
                        or `response_check_lambda` parameter.")
                if task_params.get("response_check_file"):
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable(
                            task_params["response_check_name"],
                            task_params["response_check_file"],
                        )
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_name"]
                    del task_params["response_check_file"]
                else:
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable_lambda(
                            task_params["response_check_lambda"])
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_lambda"]

            # KubernetesPodOperator
            if operator_obj == KubernetesPodOperator:
                task_params["secrets"] = ([
                    Secret(**v) for v in task_params.get("secrets")
                ] if task_params.get("secrets") is not None else None)

                task_params["ports"] = ([
                    Port(**v) for v in task_params.get("ports")
                ] if task_params.get("ports") is not None else None)
                task_params["volume_mounts"] = ([
                    VolumeMount(**v) for v in task_params.get("volume_mounts")
                ] if task_params.get("volume_mounts") is not None else None)
                task_params["volumes"] = ([
                    Volume(**v) for v in task_params.get("volumes")
                ] if task_params.get("volumes") is not None else None)
                task_params["pod_runtime_info_envs"] = ([
                    PodRuntimeInfoEnv(**v)
                    for v in task_params.get("pod_runtime_info_envs")
                ] if task_params.get("pod_runtime_info_envs") is not None else
                                                        None)
                task_params["full_pod_spec"] = (
                    V1Pod(**task_params.get("full_pod_spec"))
                    if task_params.get("full_pod_spec") is not None else None)
                task_params["init_containers"] = ([
                    V1Container(**v)
                    for v in task_params.get("init_containers")
                ] if task_params.get("init_containers") is not None else None)
            if operator_obj == DockerOperator:
                if task_params.get("environment") is not None:
                    task_params["environment"] = {
                        k: os.environ.get(v, v)
                        for k, v in task_params["environment"].items()
                    }

            if operator_obj == EcsOperator:
                for c in task_params["overrides"]["containerOverrides"]:
                    if c.get('environment') is not None:
                        for env in c['environment']:
                            env['value'] = os.environ.get(
                                env['value'], env['value'])

                if 'ECS_SECURITY_GROUPS' in af_vars and 'network_configuration' in task_params:
                    task_params["network_configuration"]["awsvpcConfiguration"]['securityGroups'] \
                        = af_vars['ECS_SECURITY_GROUPS']

                if 'ECS_SUBNETS' in af_vars and 'network_configuration' in task_params:
                    task_params['network_configuration'][
                        "awsvpcConfiguration"]["subnets"] = af_vars[
                            "ECS_SUBNETS"]

                if af_vars.get('ECS_CLUSTER'):
                    task_params['cluster'] = af_vars["ECS_CLUSTER"]
                    task_params['task_definition'] = (
                        af_vars.get('ECS_CLUSTER') + '_' +
                        task_params['task_definition']).lower()

                    task_params['awslogs_group'] = \
                        task_params['awslogs_group'] + '/' + af_vars.get('ECS_CLUSTER').lower()

            if utils.check_dict_key(task_params, "execution_timeout_secs"):
                task_params["execution_timeout"]: timedelta = timedelta(
                    seconds=task_params["execution_timeout_secs"])
                del task_params["execution_timeout_secs"]

            if utils.check_dict_key(task_params, "sla_secs"):
                task_params["sla"]: timedelta = timedelta(
                    seconds=task_params["sla_secs"])
                del task_params["sla_secs"]

            if utils.check_dict_key(task_params, "execution_delta_secs"):
                task_params["execution_delta"]: timedelta = timedelta(
                    seconds=task_params["execution_delta_secs"])
                del task_params["execution_delta_secs"]

            if utils.check_dict_key(
                    task_params,
                    "execution_date_fn_name") and utils.check_dict_key(
                        task_params, "execution_date_fn_file"):
                task_params[
                    "execution_date_fn"]: Callable = utils.get_python_callable(
                        task_params["execution_date_fn_name"],
                        task_params["execution_date_fn_file"],
                    )
                del task_params["execution_date_fn_name"]
                del task_params["execution_date_fn_file"]

            # on_execute_callback is an Airflow 2.0 feature
            if utils.check_dict_key(
                    task_params, "on_execute_callback"
            ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"):
                task_params["on_execute_callback"]: Callable = import_string(
                    task_params["on_execute_callback"])

            if utils.check_dict_key(task_params, "on_failure_callback"):
                task_params["on_failure_callback"]: Callable = import_string(
                    task_params["on_failure_callback"])

            if utils.check_dict_key(task_params, "on_success_callback"):
                task_params["on_success_callback"]: Callable = import_string(
                    task_params["on_success_callback"])

            if utils.check_dict_key(task_params, "on_retry_callback"):
                task_params["on_retry_callback"]: Callable = import_string(
                    task_params["on_retry_callback"])

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "variables_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "variables_as_arguments")
                for variable in variables:
                    if Variable.get(variable["variable"],
                                    default_var=None) is not None:
                        task_params[variable["attribute"]] = Variable.get(
                            variable["variable"], default_var=None)
                del task_params["variables_as_arguments"]

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "af_vars_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "af_vars_as_arguments")
                for variable in variables:
                    if af_vars.get(variable["variable"], None) is not None:
                        task_params[variable["attribute"]] = af_vars.get(
                            variable["variable"], None)
                del task_params["af_vars_as_arguments"]

            task: BaseOperator = operator_obj(**task_params)
        except Exception as err:
            raise Exception(f"Failed to create {operator_obj} task") from err
        return task
Пример #16
0
    def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator:
        """
        Takes an operator and params and creates an instance of that operator.

        :returns: instance of operator object
        """
        try:
            # class is a Callable https://stackoverflow.com/a/34578836/3679900
            operator_obj: Callable[..., BaseOperator] = import_string(operator)
        except Exception as err:
            raise f"Failed to import operator: {operator}" from err
        try:
            if operator_obj == PythonOperator:
                if not task_params.get(
                        "python_callable_name") and not task_params.get(
                            "python_callable_file"):
                    raise Exception(
                        "Failed to create task. PythonOperator requires `python_callable_name` \
                        and `python_callable_file` parameters.")
                task_params[
                    "python_callable"]: Callable = utils.get_python_callable(
                        task_params["python_callable_name"],
                        task_params["python_callable_file"],
                    )

            # KubernetesPodOperator
            if operator_obj == KubernetesPodOperator:
                task_params["secrets"] = ([
                    Secret(**v) for v in task_params.get("secrets")
                ] if task_params.get("secrets") is not None else None)

                task_params["ports"] = ([
                    Port(**v) for v in task_params.get("ports")
                ] if task_params.get("ports") is not None else None)
                task_params["volume_mounts"] = ([
                    VolumeMount(**v) for v in task_params.get("volume_mounts")
                ] if task_params.get("volume_mounts") is not None else None)
                task_params["volumes"] = ([
                    Volume(**v) for v in task_params.get("volumes")
                ] if task_params.get("volumes") is not None else None)
                task_params["pod_runtime_info_envs"] = ([
                    PodRuntimeInfoEnv(**v)
                    for v in task_params.get("pod_runtime_info_envs")
                ] if task_params.get("pod_runtime_info_envs") is not None else
                                                        None)
                task_params["full_pod_spec"] = (
                    V1Pod(**task_params.get("full_pod_spec"))
                    if task_params.get("full_pod_spec") is not None else None)
                task_params["init_containers"] = ([
                    V1Container(**v)
                    for v in task_params.get("init_containers")
                ] if task_params.get("init_containers") is not None else None)

            if utils.check_dict_key(task_params, "execution_timeout_secs"):
                task_params["execution_timeout"]: timedelta = timedelta(
                    seconds=task_params["execution_timeout_secs"])
                del task_params["execution_timeout_secs"]

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "variables_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "variables_as_arguments")
                for variable in variables:
                    if Variable.get(variable["variable"],
                                    default_var=None) is not None:
                        task_params[variable["attribute"]] = Variable.get(
                            variable["variable"], default_var=None)
                del task_params["variables_as_arguments"]

            task: BaseOperator = operator_obj(**task_params)
        except Exception as err:
            raise f"Failed to create {operator_obj} task" from err
        return task
    "operator": "Equal",
    "value": "wagl",
    "effect": "NoSchedule"
}]

ancillary_volume_mount = VolumeMount(
    name="wagl-nrt-ancillary-volume",
    mount_path="/ancillary",
    sub_path=None,
    read_only=False,
)

ancillary_volume = Volume(
    name="wagl-nrt-ancillary-volume",
    configs={
        "persistentVolumeClaim": {
            "claimName": "wagl-nrt-ancillary-volume"
        }
    },
)


def setup_logging():
    """ """
    _LOG.setLevel(logging.INFO)
    handler = logging.StreamHandler()
    handler.setLevel(logging.INFO)

    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    }


volume_mount = VolumeMount(
    'persist-airflow-logs'
    , mount_path='/opt/airflow/logs'
    , sub_path=None
    , read_only=False
)

volume_config= {
    'persistentVolumeClaim': { 'claimName': 'persist-airflow-logs' }
    }

volume = Volume(
    name='persist-airflow-logs'
    , configs=volume_config
)

default_args = {
    'owner': 'airflow',
}

#get current proxies in db
batch_size=15
batch_g_size=5 #used to remove that pod timeout error 
CP_count=db_import.getChildPagesCount(ps_user="******", ps_pass="******", ps_host="172.22.114.65", ps_port="5432", ps_db="scrape_db")

#iterate to run 
l_proxy_mods=["scrape_child"]

with DAG(
    def test_convert_to_airflow_pod(self):
        input_pod = k8s.V1Pod(
            metadata=k8s.V1ObjectMeta(name="foo", namespace="bar"),
            spec=k8s.V1PodSpec(
                init_containers=[
                    k8s.V1Container(name="init-container",
                                    volume_mounts=[
                                        k8s.V1VolumeMount(mount_path="/tmp",
                                                          name="init-secret")
                                    ])
                ],
                containers=[
                    k8s.V1Container(
                        name="base",
                        command=["foo"],
                        image="myimage",
                        env=[
                            k8s.V1EnvVar(
                                name="AIRFLOW_SECRET",
                                value_from=k8s.V1EnvVarSource(
                                    secret_key_ref=k8s.V1SecretKeySelector(
                                        name="ai", key="secret_key")))
                        ],
                        ports=[
                            k8s.V1ContainerPort(
                                name="myport",
                                container_port=8080,
                            )
                        ],
                        volume_mounts=[
                            k8s.V1VolumeMount(name="myvolume",
                                              mount_path="/tmp/mount",
                                              read_only="True"),
                            k8s.V1VolumeMount(name='airflow-config',
                                              mount_path='/config',
                                              sub_path='airflow.cfg',
                                              read_only=True),
                            k8s.V1VolumeMount(name="airflow-secret",
                                              mount_path="/opt/mount",
                                              read_only=True)
                        ])
                ],
                security_context=k8s.V1PodSecurityContext(
                    run_as_user=0,
                    fs_group=0,
                ),
                volumes=[
                    k8s.V1Volume(name="myvolume"),
                    k8s.V1Volume(
                        name="airflow-config",
                        config_map=k8s.V1ConfigMap(data="airflow-data")),
                    k8s.V1Volume(name="airflow-secret",
                                 secret=k8s.V1SecretVolumeSource(
                                     secret_name="secret-name", )),
                    k8s.V1Volume(name="init-secret",
                                 secret=k8s.V1SecretVolumeSource(
                                     secret_name="init-secret", ))
                ]))
        result_pod = _convert_to_airflow_pod(input_pod)

        expected = Pod(
            name="foo",
            namespace="bar",
            envs={},
            init_containers=[{
                'name':
                'init-container',
                'volumeMounts': [{
                    'mountPath': '/tmp',
                    'name': 'init-secret'
                }]
            }],
            cmds=["foo"],
            image="myimage",
            ports=[Port(name="myport", container_port=8080)],
            volume_mounts=[
                VolumeMount(name="myvolume",
                            mount_path="/tmp/mount",
                            sub_path=None,
                            read_only="True"),
                VolumeMount(name="airflow-config",
                            read_only=True,
                            mount_path="/config",
                            sub_path="airflow.cfg"),
                VolumeMount(name="airflow-secret",
                            mount_path="/opt/mount",
                            sub_path=None,
                            read_only=True)
            ],
            secrets=[Secret("env", "AIRFLOW_SECRET", "ai", "secret_key")],
            security_context={
                'fsGroup': 0,
                'runAsUser': 0
            },
            volumes=[
                Volume(name="myvolume", configs={'name': 'myvolume'}),
                Volume(name="airflow-config",
                       configs={
                           'configMap': {
                               'data': 'airflow-data'
                           },
                           'name': 'airflow-config'
                       }),
                Volume(name='airflow-secret',
                       configs={
                           'name': 'airflow-secret',
                           'secret': {
                               'secretName': 'secret-name'
                           }
                       }),
                Volume(name='init-secret',
                       configs={
                           'name': 'init-secret',
                           'secret': {
                               'secretName': 'init-secret'
                           }
                       })
            ],
        )
        expected_dict = expected.as_dict()
        result_dict = result_pod.as_dict()
        print(result_pod.volume_mounts)
        parsed_configs = self.pull_out_volumes(result_dict)
        result_dict['volumes'] = parsed_configs
        self.assertEqual(result_dict['secrets'], expected_dict['secrets'])
        self.assertDictEqual(expected_dict, result_dict)
Пример #20
0
from airflow.operators.bash import BashOperator
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator
from airflow.utils.dates import days_ago

# [START howto_operator_k8s_cluster_resources]
secret_file = Secret('volume', '/etc/sql_conn', 'airflow-secrets',
                     'sql_alchemy_conn')
secret_env = Secret('env', 'SQL_CONN', 'airflow-secrets', 'sql_alchemy_conn')
secret_all_keys = Secret('env', None, 'airflow-secrets-2')
volume_mount = VolumeMount('test-volume',
                           mount_path='/root/mount_file',
                           sub_path=None,
                           read_only=True)
configmaps = ['test-configmap-1', 'test-configmap-2']
volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}}
volume = Volume(name='test-volume', configs=volume_config)
# [END howto_operator_k8s_cluster_resources]

port = Port('http', 80)

init_container_volume_mounts = [
    k8s.V1VolumeMount(mount_path='/etc/foo',
                      name='test-volume',
                      sub_path=None,
                      read_only=True)
]

init_environments = [
    k8s.V1EnvVar(name='key1', value='value1'),
    k8s.V1EnvVar(name='key2', value='value2')
]
Пример #21
0
    dag=dag)

failed_callback = ExtendedHttpOperator(
    http_conn_id="apar_graphql",
    endpoint="graphql/",
    method="POST",
    headers={"Content-Type": "application/json"},
    data_fn=partial(get_job_status_update_callable, "FAILED"),
    task_id="failed_callback",
    dag=dag,
    trigger_rule="all_failed",
)

volume_config = {"persistentVolumeClaim": {"claimName": "pvc-data-name"}}

test_volume = Volume(name="pv-data-name", configs=volume_config)

test_volume_mount = VolumeMount("pv-data-name",
                                mount_path="/data",
                                sub_path=None,
                                read_only=False)

fastq_dump = KubernetesPodOperator(
    namespace="airflow",
    image="quay.io/biocontainers/sra-tools:2.10.0--pl526he1b5a44_0",
    cmds=[
        "fastq-dump", "--outdir", "data/fastq", "--gzip", "--skip-technical",
        "--readids", "--read-filter", "pass", "--dumpbase", "--split-3",
        "--clip", "SRR6982497"
    ],
    name="fastq_dump",