Exemplo n.º 1
0
    def test_get_secrets(self):
        # Test when secretRef is None and kube_secrets is not empty
        self.kube_config.kube_secrets = {
            'AWS_SECRET_KEY': 'airflow-secret=aws_secret_key',
            'POSTGRES_PASSWORD': '******'
        }
        self.kube_config.env_from_secret_ref = None
        worker_config = WorkerConfiguration(self.kube_config)
        secrets = worker_config._get_secrets()
        secrets.sort(key=lambda secret: secret.deploy_target)
        expected = [
            Secret('env', 'AWS_SECRET_KEY', 'airflow-secret',
                   'aws_secret_key'),
            Secret('env', 'POSTGRES_PASSWORD', 'airflow-secret',
                   'postgres_credentials')
        ]
        self.assertListEqual(expected, secrets)

        # Test when secret is not empty and kube_secrets is empty dict
        self.kube_config.kube_secrets = {}
        self.kube_config.env_from_secret_ref = 'secret_a,secret_b'
        worker_config = WorkerConfiguration(self.kube_config)
        secrets = worker_config._get_secrets()
        expected = [
            Secret('env', None, 'secret_a'),
            Secret('env', None, 'secret_b')
        ]
        self.assertListEqual(expected, secrets)
 def test_extract_volume_secrets(self):
     # Test when secrets is not empty
     secrets = [
         Secret('volume', 'KEY1', 's1', 'key-1'),
         Secret('env', 'KEY2', 's2'),
         Secret('volume', 'KEY3', 's3', 'key-2')
     ]
     pod = Pod('v3.14', {}, [], secrets=secrets)
     self.expected['spec']['containers'][0]['volumeMounts'] = [{
         'mountPath': 'KEY1',
         'name': 'secretvol0',
         'readOnly': True
     }, {
         'mountPath': 'KEY3',
         'name': 'secretvol1',
         'readOnly': True
     }]
     self.expected['spec']['volumes'] = [{
         'name': 'secretvol0',
         'secret': {
             'secretName': 's1'
         }
     }, {
         'name': 'secretvol1',
         'secret': {
             'secretName': 's3'
         }
     }]
     KubernetesRequestFactory.extract_volume_secrets(pod, self.input_req)
     self.assertEqual(self.input_req, self.expected)
    def test_extract_env_and_secrets(self):
        # Test when secrets and envs are not empty
        secrets = [
            Secret('env', None, 's1'),
            Secret('volume', 'KEY2', 's2', 'key-2'),
            Secret('env', None, 's3')
        ]
        envs = {
            'ENV1': 'val1',
            'ENV2': 'val2'
        }
        configmaps = ['configmap_a', 'configmap_b']
        pod_runtime_envs = [PodRuntimeInfoEnv("ENV3", "status.podIP")]
        pod = Pod(
            image='v3.14',
            envs=envs,
            cmds=[],
            secrets=secrets,
            configmaps=configmaps,
            pod_runtime_info_envs=pod_runtime_envs)
        self.expected['spec']['containers'][0]['env'] = [
            {'name': 'ENV1', 'value': 'val1'},
            {'name': 'ENV2', 'value': 'val2'},
            {
                'name': 'ENV3',
                'valueFrom': {
                    'fieldRef': {
                        'fieldPath': 'status.podIP'
                    }
                }
            }
        ]
        self.expected['spec']['containers'][0]['envFrom'] = [{
            'secretRef': {
                'name': 's1'
            }
        }, {
            'secretRef': {
                'name': 's3'
            }
        }, {
            'configMapRef': {
                'name': 'configmap_a'
            }
        }, {
            'configMapRef': {
                'name': 'configmap_b'
            }
        }]

        KubernetesRequestFactory.extract_env_and_secrets(pod, self.input_req)
        self.input_req['spec']['containers'][0]['env'].sort(key=lambda x: x['name'])
        self.assertEqual(self.input_req, self.expected)
Exemplo n.º 4
0
    def _get_secrets(self):
        """Defines any necessary secrets for the pod executor"""
        worker_secrets = []

        for env_var_name, obj_key_pair in six.iteritems(
                self.kube_config.kube_secrets):
            k8s_secret_obj, k8s_secret_key = obj_key_pair.split('=')
            worker_secrets.append(
                Secret('env', env_var_name, k8s_secret_obj, k8s_secret_key))

        if self.kube_config.env_from_secret_ref:
            for secret_ref in self.kube_config.env_from_secret_ref.split(','):
                worker_secrets.append(Secret('env', None, secret_ref))

        return worker_secrets
Exemplo n.º 5
0
def build_secret_volume(secret, key="service-account.json"):
    return Secret(
        deploy_type="volume",
        deploy_target="/var/secrets/google",
        secret=secret,
        key=key,
    )
Exemplo n.º 6
0
    def test_extract_env_and_secrets(self):
        # Test when secrets and envs are not empty
        secrets = [
            Secret('env', None, 's1'),
            Secret('volume', 'KEY2', 's2', 'key-2'),
            Secret('env', None, 's3')
        ]
        envs = {'ENV1': 'val1', 'ENV2': 'val2'}
        configmaps = ['configmap_a', 'configmap_b']
        pod = Pod('v3.14', envs, [], secrets=secrets, configmaps=configmaps)
        self.expected['spec']['containers'][0]['env'] = [
            {
                'name': 'ENV1',
                'value': 'val1'
            },
            {
                'name': 'ENV2',
                'value': 'val2'
            },
        ]
        self.expected['spec']['containers'][0]['envFrom'] = [{
            'secretRef': {
                'name': 's1'
            }
        }, {
            'secretRef': {
                'name': 's3'
            }
        }, {
            'configMapRef': {
                'name': 'configmap_a'
            }
        }, {
            'configMapRef': {
                'name': 'configmap_b'
            }
        }]

        KubernetesRequestFactory.extract_env_and_secrets(pod, self.input_req)
        self.input_req['spec']['containers'][0]['env'].sort(
            key=lambda x: x['name'])
        self.assertEqual(self.input_req, self.expected)
 def test_add_secret_to_env(self):
     secret = Secret('env', 'target', 'my-secret', 'KEY')
     secret_list = []
     self.expected = [{
         'name': 'TARGET',
         'valueFrom': {
             'secretKeyRef': {
                 'name': 'my-secret',
                 'key': 'KEY'
             }
         }
     }]
     KubernetesRequestFactory.add_secret_to_env(secret_list, secret)
     self.assertListEqual(secret_list, self.expected)
Exemplo n.º 8
0
    def get_secrets(self):
        """Defines any necessary secrets for the pod executor"""
        from airflow.contrib.kubernetes.secret import Secret

        result = []
        secrets = self.system_secrets + self.secrets
        for secret_data in secrets:
            result.append(
                Secret(
                    deploy_type=secret_data.get("type"),
                    deploy_target=secret_data.get("target"),
                    secret=secret_data.get("secret"),
                    key=secret_data.get("key"),
                ))

        return result
Exemplo n.º 9
0
 def test_envs_from_secrets(self, client_mock, launcher_mock):
     # GIVEN
     from airflow.utils.state import State
     secrets = [Secret('env', None, "secret_name")]
     # WHEN
     k = KubernetesPodOperator(
         namespace='default',
         image="ubuntu:16.04",
         cmds=["bash", "-cx"],
         arguments=["echo 10"],
         secrets=secrets,
         labels={"foo": "bar"},
         name="test",
         task_id="task",
     )
     # THEN
     launcher_mock.return_value = (State.SUCCESS, None)
     k.execute(None)
     self.assertEqual(launcher_mock.call_args[0][0].secrets, secrets)
Exemplo n.º 10
0
def handle_container_environment_variables(
        env_vars: List[V1EnvVar],
) -> Tuple[Dict[str, str], List[Secret], List[str], List[PodRuntimeInfoEnv]]:
    secrets = []
    plain_env_vars = {}
    config_maps = []
    runtime_env_vars = []
    for env_var in env_vars or []:
        value_from: V1EnvVarSource = env_var.value_from
        if value_from:
            if value_from.resource_field_ref:
                # not handled for now
                continue
            if value_from.field_ref:
                field_ref: V1ObjectFieldSelector = value_from.field_ref
                runtime_env_vars.append(
                    PodRuntimeInfoEnv(
                        field_path=field_ref.field_path, name=env_var.name
                    )
                )
                continue

            if value_from.config_map_key_ref:
                key_ref: V1ConfigMapKeySelector = value_from.config_map_key_ref
                config_maps.append(key_ref.name)
                continue

            if value_from.secret_key_ref:
                key_ref: V1SecretKeySelector = value_from.secret_key_ref
                secrets.append(
                    Secret(
                        deploy_type="env",
                        deploy_target=env_var.name,
                        secret=key_ref.name,
                        key=key_ref.key,
                    )
                )
                continue

        plain_env_vars[env_var.name] = env_var.value

    return plain_env_vars, secrets, config_maps, runtime_env_vars
Exemplo n.º 11
0
 def test_envs_from_secrets(self, mock_client, mock_launcher):
     # GIVEN
     from airflow.utils.state import State
     secret_ref = 'secret_name'
     secrets = [Secret('env', None, secret_ref)]
     # WHEN
     k = KubernetesPodOperator(
         namespace='default',
         image="ubuntu:16.04",
         cmds=["bash", "-cx"],
         arguments=["echo 10"],
         secrets=secrets,
         labels={"foo": "bar"},
         name="test",
         task_id="task",
         in_cluster=False,
         do_xcom_push=False,
     )
     # THEN
     mock_launcher.return_value = (State.SUCCESS, None)
     k.execute(None)
     self.assertEqual(mock_launcher.call_args[0][0].secrets, secrets)
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.secret import Secret

args = {'owner': 'Airflow', 'start_date': days_ago(2)}

dag = DAG(
    'citi-bike-pipeline',
    default_args=args,
    description='Citi Bike Pipeline',
    schedule_interval=None,
)

aws_access_key_id = Secret('env', 'AWS_ACCESS_KEY_ID', 'citi-bike-secrets',
                           'aws_access_key_id')
aws_secret_access_key = Secret('env', 'AWS_SECRET_ACCESS_KEY',
                               'citi-bike-secrets', 'aws_secret_access_key')

# TODO Change this to your ecr dag image
ecr_image = "<dag_ecr_image_url>"

create_cluster_task = KubernetesPodOperator(
    namespace='citi-bike',
    task_id="create_cluster",
    name="create_cluster_task",
    image=ecr_image,
    image_pull_policy='Always',
    arguments=["create_cluster"],
    do_xcom_push=True,
    secrets=[aws_access_key_id, aws_secret_access_key],
Exemplo n.º 13
0
from airflow.models.dag import DAG
from datetime import datetime, timedelta
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.secret import Secret

aws_access_key_id = Secret('env', 'AWS_ACCESS_KEY_ID', 'airflow-aws',
                           'AWS_ACCESS_KEY_ID')
aws_secret_access_key = Secret('env', 'AWS_SECRET_ACCESS_KEY', 'airflow-aws',
                               'AWS_SECRET_ACCESS_KEY')
aws_account = Secret('env', 'AWS_ACCOUNT', 'airflow-aws', 'AWS_ACCOUNT')
spark_image = 'gcr.io/engineering-sandbox-228018/dev-pyspark:2.4.4'

default_args = {
    'owner':
    'airflow',
    'namespace':
    'ns-airflow',
    'depends_on_past':
    False,
    'get_logs':
    True,
    'start_date':
    datetime(2020, 1, 1),
    'email_on_failure':
    False,
    'email_on_retry':
    False,
    'retries':
    1,
    'retry_delay':
    timedelta(minutes=5),
Exemplo n.º 14
0
#!/usr/bin/env python
from os import environ
import json
from airflow import DAG
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.secret import Secret
from airflow.utils.dates import days_ago
from airflow.utils.helpers import chain
from pipeline.testnatgateway.default_values import DEFAULT_VALUES

SECRET_ENV = Secret(deploy_type="env", deploy_target=None, secret="airflow-secret")
PIPILE_NAME = "testnatgateway"
WORKLOAD = int(environ["DAG_WORKLOAD"])

DRONE_LOG_DAG = DAG(
    PIPILE_NAME,
    default_args=DEFAULT_VALUES,
    schedule_interval=None,  # '@once',
    description="Insert UUID row into db",
    start_date=days_ago(1),
)

INDEX_FILE = "index.txt"
INDEX_PREFIX = f"airflow/{PIPILE_NAME}/indexes"
INDEX2 = KubernetesPodOperator(
    dag=DRONE_LOG_DAG,
    image=f"{environ['DOCKER_REGISTRY']}/pipeline/{PIPILE_NAME}:index",
    namespace="airflow",
    image_pull_policy="Always",
    name="index",
    arguments=[INDEX_PREFIX, INDEX_FILE],
Exemplo n.º 15
0
    "retries": 1,
    "retry_delay": timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
    "env_vars": {
        "AWS_DEFAULT_REGION": "ap-southeast-2",
        "DB_HOSTNAME": DB_HOSTNAME,
        # The run day's DB
        "DB_DATABASE": DB_DATABASE,
    },
    # Use K8S secrets to send DB Creds
    # Lift secrets into environment variables for datacube
    "secrets": [
        Secret("env", "DB_USERNAME", "replicator-db", "postgres-username"),
        # For Datacube to use
        Secret("env", "DB_PASSWORD", "replicator-db", "postgres-password"),
        # For psql to use
        Secret("env", "PGPASSWORD", "replicator-db", "postgres-password"),
    ],
}

# Point to Geoscience Australia / OpenDataCube Dockerhub
S3_TO_RDS_IMAGE = "geoscienceaustralia/s3-to-rds:latest"
EXPLORER_IMAGE = "opendatacube/explorer:2.1.9"

dag = DAG(
    "k8s_db_sync",
    doc_md=__doc__,
    default_args=DEFAULT_ARGS,
Exemplo n.º 16
0
    'email': '*****@*****.**',
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 0,
    'retry_delay': timedelta(seconds=10)
}

MAIN_DAG_ID = "ba_dag"
main_dag = DAG(MAIN_DAG_ID,
               default_args=default_args,
               schedule_interval='@weekly',
               catchup=True,
               max_active_runs=1)

service_account_secret_file = Secret('volume', '/etc/ga_service_account',
                                     'ga-service-account-json',
                                     'ga-service-account.json')
client_secret_secret_file = Secret('volume', '/etc/ga_client_secret',
                                   'ga-client-secret-json',
                                   'ga-client-secret.json')

script_root_path = '/tmp/scripts'
get_user_ids_r_script_path = "Services/ELT/DA/getUserIDs.R"
get_user_ids_r_script_whole_path = script_root_path + \
    "/" + get_user_ids_r_script_path
retrieve_user_session_activity_r_script_path = "Services/ELT/DA/getUserIDs.R"
retrieve_user_session_activity_r_script_whole_path = script_root_path + \
    "/" + retrieve_user_session_activity_r_script_path

volume_mount = VolumeMount('git-root-path',
                           mount_path=script_root_path,
Exemplo n.º 17
0
    "email_on_retry":
    False,
    "retries":
    1,
    "retry_delay":
    timedelta(minutes=5),
    "env_vars": {
        "AWS_DEFAULT_REGION": "ap-southeast-2",
        # TODO: Pass these via templated params in DAG Run
        "DB_HOSTNAME": "database-write.local",
        "DB_DATABASE": "ows-index",
    },
    # Use K8S secrets to send DB Creds
    # Lift secrets into environment variables for datacube
    "secrets": [
        Secret("env", "DB_USERNAME", "ows-db", "postgres-username"),
        Secret("env", "DB_PASSWORD", "ows-db", "postgres-password"),
    ],
}

INDEXER_IMAGE = "opendatacube/datacube-index:0.0.5"
OWS_IMAGE = "opendatacube/ows:1.8.0"
EXPLORER_IMAGE = "opendatacube/explorer:2.1.9"

dag = DAG("k8s_s3_orchestrate",
          doc_md=__doc__,
          default_args=DEFAULT_ARGS,
          schedule_interval=None,
          catchup=False,
          tags=["k8s"])
Exemplo n.º 18
0
# pd.datetime is an alias for datetime.datetime
today = pd.datetime.today()

from airflow import models
from airflow.contrib.kubernetes.secret import Secret
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator

default_dag_args = {
    # Setting start date as yesterday starts the DAG immediately when it is
    # detected in the Cloud Storage bucket.
    'start_date': datetime.datetime(2019, 5, 31),
}
DOCKER_IMAGE = 'gcr.io/dta-ga-bigquery/galileo'
DATA_DIR = '/home/airflow/gcs/data/'
GCS_BUCKET = 'us-east1-dta-airflow-b3415db4-bucket'
cf_username = Secret('env', 'CF_USERNAME', 'airflow-secrets', 'CF_USERNAME')
cf_password = Secret('env', 'CF_PASSWORD', 'airflow-secrets', 'CF_PASSWORD')
htpasswd = models.Variable.get('HTPASSWD', '')

with models.DAG('observatory',
                schedule_interval=datetime.timedelta(days=7),
                default_args=default_dag_args) as dag:
    deploy_shiny = KubernetesPodOperator(
        task_id='deploy-shiny',
        name='deploy-shiny',
        namespace='default',
        secrets=[cf_username, cf_password],
        image_pull_policy="Always",
        image=DOCKER_IMAGE,
        cmds=['bash', '-c'],
        arguments=[
Exemplo n.º 19
0
    # Continue to run this DAG once per day
    'schedule_interval': datetime.timedelta(days=1),
    # Setting start date as yesterday starts the DAG immediately after discovery
    'start_date': datetime.datetime.now() - datetime.timedelta(days=1)
}

# A Secret is an object that contains a small amount of sensitive data
# such as a password, a token, or a key. Such information might otherwise be
# put in a Pod specification or in an image; putting it in a Secret object
# allows for more control over how it is used, and reduces the risk of
# accidental exposure.
secret_file = Secret(
    # Mounts the secret as a file in RAM-backed tmpfs
    deploy_type='volume',
    # File path of where to deploy the target, since 'volume'
    deploy_target='/etc/sql_conn',
    # Name of secret in Kubernetes
    secret='airflow-secrets',
    # Key of the secret within Kubernetes
    key='sql_alchemy_conn')

secret_env = Secret(
    # Exposes secret as environment variable
    deploy_type='env',
    # The environment variable the secret is specified under
    deploy_target='SQL_CONN',
    # Name of secret in Kubernetes
    secret='airflow-secrets',
    # Key of the secret within Kubernetes
    key='sql_alchemy_conn')
Exemplo n.º 20
0
from airflow.contrib.kubernetes.secret import Secret


# GCP service account for dbt operations with BigQuery
# TODO: make this a volume deploy type?
DBT_SERVICE_ACCOUNT = Secret(
    # Expose the secret as environment variable.
    deploy_type="env",
    # The name of the environment variable, since deploy_type is `env` rather
    # than `volume`.
    deploy_target="SERVICE_ACCOUNT",
    # Name of the Kubernetes Secret
    secret="dbt-secret",
    # Key of a secret stored in this Secret object
    key="account.json",
)

GIT_SECRET_ID_RSA_PRIVATE = Secret(
    deploy_type="volume",
    deploy_target="/dbt/.ssh/",
    secret="ssh-key-secret",
    key="id_rsa",
)
Exemplo n.º 21
0
 def setUp(self):
     self.simple_pod_request_factory = SimplePodRequestFactory()
     self.xcom_pod_request_factory = ExtractXcomPodRequestFactory()
     self.pod = Pod(
         image='busybox',
         envs={
             'ENVIRONMENT': 'prod',
             'LOG_LEVEL': 'warning'
         },
         name='myapp-pod',
         cmds=['sh', '-c', 'echo Hello Kubernetes!'],
         labels={'app': 'myapp'},
         image_pull_secrets='pull_secret_a,pull_secret_b',
         configmaps=['configmap_a', 'configmap_b'],
         secrets=[
             # This should be a secretRef
             Secret('env', None, 'secret_a'),
             # This should be a single secret mounted in volumeMounts
             Secret('volume', '/etc/foo', 'secret_b'),
             # This should produce a single secret mounted in env
             Secret('env', 'TARGET', 'secret_b', 'source_b'),
         ],
         security_context={
             'runAsUser': 1000,
             'fsGroup': 2000,
         })
     self.maxDiff = None
     self.expected = {
         'apiVersion': 'v1',
         'kind': 'Pod',
         'metadata': {
             'name': 'myapp-pod',
             'labels': {
                 'app': 'myapp'
             },
             'annotations': {}
         },
         'spec': {
             'containers': [{
                 'name':
                 'base',
                 'image':
                 'busybox',
                 'command': ['sh', '-c', 'echo Hello Kubernetes!'],
                 'imagePullPolicy':
                 'IfNotPresent',
                 'args': [],
                 'env': [{
                     'name': 'ENVIRONMENT',
                     'value': 'prod'
                 }, {
                     'name': 'LOG_LEVEL',
                     'value': 'warning'
                 }, {
                     'name': 'TARGET',
                     'valueFrom': {
                         'secretKeyRef': {
                             'name': 'secret_b',
                             'key': 'source_b'
                         }
                     }
                 }],
                 'envFrom': [{
                     'secretRef': {
                         'name': 'secret_a'
                     }
                 }, {
                     'configMapRef': {
                         'name': 'configmap_a'
                     }
                 }, {
                     'configMapRef': {
                         'name': 'configmap_b'
                     }
                 }],
                 'volumeMounts': [{
                     'mountPath': '/etc/foo',
                     'name': 'secretvol0',
                     'readOnly': True
                 }]
             }],
             'restartPolicy':
             'Never',
             'nodeSelector': {},
             'volumes': [{
                 'name': 'secretvol0',
                 'secret': {
                     'secretName': 'secret_b'
                 }
             }],
             'imagePullSecrets': [{
                 'name': 'pull_secret_a'
             }, {
                 'name': 'pull_secret_b'
             }],
             'affinity': {},
             'securityContext': {
                 'runAsUser': 1000,
                 'fsGroup': 2000,
             },
         }
     }
Exemplo n.º 22
0
 def test_secret_throws(self):
     with self.assertRaises(AirflowConfigException):
         Secret('volume', None, 'secret_a', 'key')
##
# Secret Configuration
##

"""
Secrets pull secret variables and their contents from Kubernetes. You do this to protect things like database credentials. You can do this with files, tokens, or variables.
Arguments:
deploy_type (string): How you want to deploy this secret inside the container
deploy_target (string): The name of the environmental variable in this case
secret (string): The name of the secret stored in Kubernetes
key (string): The key of the secret stored in the object
"""
secret_env = Secret(
    deploy_type='env',
    deploy_target='SQL_CONN',
    secret='airflow-secrets',
    key='sql_alchemy_conn')

##
# Example DAG
##
with DAG(
    dag_id='kubernetes_pod_operator_blobfuse',
    default_args=args,
    schedule_interval=None,
    tags=['example'],
) as dag:

    
    """
Exemplo n.º 24
0
from airflow.contrib.kubernetes.secret import Secret

# AWS
AWS_ACCOUNT_ID = Secret("env", "AWS_ACCOUNT_ID", "airflow", "AWS_ACCOUNT_ID")

# Snowflake Load
SNOWFLAKE_LOAD_DATABASE = Secret("env", "SNOWFLAKE_LOAD_DATABASE", "airflow",
                                 "SNOWFLAKE_LOAD_DATABASE")
SNOWFLAKE_LOAD_ROLE = Secret("env", "SNOWFLAKE_LOAD_ROLE", "airflow",
                             "SNOWFLAKE_LOAD_ROLE")
SNOWFLAKE_LOAD_PASSWORD = Secret("env", "SNOWFLAKE_LOAD_PASSWORD", "airflow",
                                 "SNOWFLAKE_LOAD_PASSWORD")
SNOWFLAKE_LOAD_USER = Secret("env", "SNOWFLAKE_LOAD_USER", "airflow",
                             "SNOWFLAKE_LOAD_USER")
SNOWFLAKE_LOAD_WAREHOUSE = Secret("env", "SNOWFLAKE_LOAD_WAREHOUSE", "airflow",
                                  "SNOWFLAKE_LOAD_WAREHOUSE")

# Snowflake Transform
SNOWFLAKE_TRANSFORM_ROLE = Secret("env", "SNOWFLAKE_TRANSFORM_ROLE", "airflow",
                                  "SNOWFLAKE_TRANSFORM_ROLE")
SNOWFLAKE_TRANSFORM_SCHEMA = Secret("env", "SNOWFLAKE_TRANSFORM_SCHEMA",
                                    "airflow", "SNOWFLAKE_TRANSFORM_SCHEMA")
SNOWFLAKE_TRANSFORM_USER = Secret("env", "SNOWFLAKE_TRANSFORM_USER", "airflow",
                                  "SNOWFLAKE_TRANSFORM_USER")
SNOWFLAKE_TRANSFORM_WAREHOUSE = Secret("env", "SNOWFLAKE_TRANSFORM_WAREHOUSE",
                                       "airflow",
                                       "SNOWFLAKE_TRANSFORM_WAREHOUSE")
SNOWFLAKE_USER = Secret("env", "SNOWFLAKE_USER", "airflow", "SNOWFLAKE_USER")
SNOWFLAKE_ACCOUNT = Secret("env", "SNOWFLAKE_ACCOUNT", "airflow",
                           "SNOWFLAKE_ACCOUNT")
SNOWFLAKE_PASSWORD = Secret("env", "SNOWFLAKE_PASSWORD", "airflow",
from airflow.contrib.kubernetes.secret import Secret

# AWS
AWS_ACCOUNT_ID = Secret("env", "AWS_ACCOUNT_ID", "airflow", "AWS_ACCOUNT_ID")
AWS_ACCESS_KEY_ID = Secret("env", "AWS_ACCESS_KEY_ID", "airflow",
                           "AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = Secret("env", "AWS_SECRET_ACCESS_KEY", "airflow",
                               "AWS_SECRET_ACCESS_KEY")

# blapi
BLAPI_DATABASE_URL = Secret("env", "BLAPI_DATABASE_URL", "airflow",
                            "BLAPI_DATABASE_URL")
BLAPI_TOKEN = Secret("env", "BLAPI_TOKEN", "airflow", "BLAPI_TOKEN")
BLAPI_URL = Secret("env", "BLAPI_URL", "airflow", "BLAPI_URL")

BLAPI_TEST_URL = Secret("env", "BLAPI_TEST_URL", "airflow", "BLAPI_TEST_URL")
BLAPI_TEST_TOKEN = Secret("env", "BLAPI_TEST_TOKEN", "airflow",
                          "BLAPI_TEST_TOKEN")
BLAPI_TEST_DATABASE_URL = Secret("env", "BLAPI_TEST_DATABASE_URL", "airflow",
                                 "BLAPI_TEST_DATABASE_URL")

# dbt Cloud
DBT_CLOUD_API_KEY = Secret("env", "DBT_CLOUD_API_KEY", "airflow",
                           "DBT_CLOUD_API_KEY")
DBT_CLOUD_API_ACCOUNT_ID = Secret("env", "DBT_CLOUD_API_ACCOUNT_ID", "airflow",
                                  "DBT_CLOUD_API_ACCOUNT_ID")

# Snowflake Load
SNOWFLAKE_LOAD_DATABASE = Secret("env", "SNOWFLAKE_LOAD_DATABASE", "airflow",
                                 "SNOWFLAKE_LOAD_DATABASE")
SNOWFLAKE_LOAD_ROLE = Secret("env", "SNOWFLAKE_LOAD_ROLE", "airflow",
Exemplo n.º 26
0
          default_args=default_args,
          schedule_interval=dag_interval,
          concurrency=1)

# env variables for inside the k8s pod
k8s_run_env = {
    'SECRETS_PATH': '/secrets/wcgh-secrets.json',
    'COVID_19_DEPLOY_FILE': 'covid-19-widgets.zip',
    'COVID_19_DEPLOY_URL': 'https://ds2.capetown.gov.za/covid-19-widgets-deploy',
    'COVID_19_WIDGETS_DIR': '/covid-19-widgets',
    'DB_UTILS_LOCATION': 'https://ds2.capetown.gov.za/db-utils',
    'DB_UTILS_PKG': 'db_utils-0.3.7-py2.py3-none-any.whl'
}

# airflow-workers' secrets
secret_file = Secret('volume', '/secrets', 'wcgh-secret')

# arguments for the k8s operator
k8s_run_args = {
    "image": "cityofcapetown/datascience:python@sha256:c5a8ec97e35e603aca281343111193a26a929d821b84c6678fb381f9e7bd08d7",
    "namespace": 'airflow-workers',
    "is_delete_operator_pod": True,
    "get_logs": True,
    "in_cluster": True,
    "secrets": [secret_file],
    "env_vars": k8s_run_env,
    "image_pull_policy": "IfNotPresent",
    "startup_timeout_seconds": 60 * 30,
}

          schedule_interval=dag_interval,
          concurrency=5)

# env variables for inside the k8s pod
k8s_run_env = {
    'SECRETS_PATH': '/secrets/secrets.json',
    'COVID_19_DEPLOY_FILE': 'covid-19-widgets.zip',
    'COVID_19_DEPLOY_URL':
    'https://ds2.capetown.gov.za/covid-19-widgets-deploy',
    'COVID_19_WIDGETS_DIR': '/covid-19-widgets',
    'DB_UTILS_LOCATION': 'https://ds2.capetown.gov.za/db-utils',
    'DB_UTILS_PKG': 'db_utils-0.4.0-py2.py3-none-any.whl'
}

# airflow-workers' secrets
secret_file = Secret('volume', '/secrets', 'airflow-workers-secret')

# arguments for the k8s operator
k8s_run_args = {
    "image":
    "cityofcapetown/datascience:python@sha256:53a435a5550f057b89b5d750d19efd1c6ac740a0db340698d928929d4335a64f",
    "namespace": 'airflow-workers',
    "is_delete_operator_pod": True,
    "get_logs": True,
    "in_cluster": True,
    "secrets": [secret_file],
    "env_vars": k8s_run_env,
    "image_pull_policy": "IfNotPresent",
    "startup_timeout_seconds": 60 * 30,
}
from airflow.contrib.kubernetes.secret import Secret

# AWS
AWS_ACCOUNT_ID = Secret("env", "AWS_ACCOUNT_ID", "airflow", "AWS_ACCOUNT_ID")
AWS_ACCESS_KEY_ID = Secret("env", "AWS_ACCESS_KEY_ID", "airflow",
                           "AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = Secret("env", "AWS_SECRET_ACCESS_KEY", "airflow",
                               "AWS_SECRET_ACCESS_KEY")

# Snowflake Load
SNOWFLAKE_LOAD_DATABASE = Secret("env", "SNOWFLAKE_LOAD_DATABASE", "airflow",
                                 "SNOWFLAKE_LOAD_DATABASE")
SNOWFLAKE_LOAD_ROLE = Secret("env", "SNOWFLAKE_LOAD_ROLE", "airflow",
                             "SNOWFLAKE_LOAD_ROLE")
SNOWFLAKE_LOAD_PASSWORD = Secret("env", "SNOWFLAKE_LOAD_PASSWORD", "airflow",
                                 "SNOWFLAKE_LOAD_PASSWORD")
SNOWFLAKE_LOAD_USER = Secret("env", "SNOWFLAKE_LOAD_USER", "airflow",
                             "SNOWFLAKE_LOAD_USER")
SNOWFLAKE_LOAD_WAREHOUSE = Secret("env", "SNOWFLAKE_LOAD_WAREHOUSE", "airflow",
                                  "SNOWFLAKE_LOAD_WAREHOUSE")

# Snowflake Transform
SNOWFLAKE_TRANSFORM_ROLE = Secret("env", "SNOWFLAKE_TRANSFORM_ROLE", "airflow",
                                  "SNOWFLAKE_TRANSFORM_ROLE")
SNOWFLAKE_TRANSFORM_SCHEMA = Secret("env", "SNOWFLAKE_TRANSFORM_SCHEMA",
                                    "airflow", "SNOWFLAKE_TRANSFORM_SCHEMA")
SNOWFLAKE_TRANSFORM_USER = Secret("env", "SNOWFLAKE_TRANSFORM_USER", "airflow",
                                  "SNOWFLAKE_TRANSFORM_USER")
SNOWFLAKE_TRANSFORM_WAREHOUSE = Secret("env", "SNOWFLAKE_TRANSFORM_WAREHOUSE",
                                       "airflow",
                                       "SNOWFLAKE_TRANSFORM_WAREHOUSE")
Exemplo n.º 29
0
from airflow.contrib.operators import KubernetesOperator
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.secret import Secret

secret_file = Secret('volume', '/etc/sql_conn', 'airflow-secrets', 'sql_alchemy_conn')
secret_env  = Secret('env', 'SQL_CONN', 'airflow-secrets', 'sql_alchemy_conn')
volume_mount = VolumeMount('test-volume',
                            mount_path='/root/mount_file',
                            sub_path=None,
                            read_only=True)

volume_config= {
    'persistentVolumeClaim':
      {
        'claimName': 'test-volume'
      }
    }
volume = Volume(name='test-volume', configs=volume_config)

affinity = {
    'nodeAffinity': {
      'preferredDuringSchedulingIgnoredDuringExecution': [
        {
          "weight": 1,
          "preference": {
            "matchExpressions": {
              "key": "disktype",
              "operator": "In",
              "values": ["ssd"]
            }
          }
Exemplo n.º 30
0
from datetime import datetime, timedelta
from airflow import DAG
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.secret import Secret

secrets = [
	Secret(
		deploy_type="volume",
		deploy_target="/etc/secrets",
		secret="common-svc-acc",
		key="sa-key.json")
]

default_args = {
    "owner": "eduard.chai",
    "start_date": datetime(2020, 3, 1),
    "email_on_failure": False,
    "email_on_retry": False,
    "retries": 1,
    "retry_delay": timedelta(minutes=3)
}

dag = DAG(
    "gcs_loader",
    default_args=default_args,
    schedule_interval="0 1 * * *",
    catchup=False,
    max_active_runs=1
)