from airflow import models from airflow.contrib.kubernetes import secret from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator # A Secret is an object that contains a small amount of sensitive data such as # a password, a token, or a key. Such information might otherwise be put in a # Pod specification or in an image; putting it in a Secret object allows for # more control over how it is used, and reduces the risk of accidental # exposure. # [START composer_kubernetespodoperator_secretobject] secret_env = secret.Secret( # Expose the secret as environment variable. deploy_type='env', # The name of the environment variable, since deploy_type is `env` rather # than `volume`. deploy_target='SQL_CONN', # Name of the Kubernetes Secret secret='airflow-secrets', # Key of a secret stored in this Secret object key='sql_alchemy_conn') secret_volume = secret.Secret( 'volume', # Path where we mount the secret as volume '/var/secrets/google', # Name of Kubernetes Secret 'service-account', # Key in the form of service account file name 'service-account.json') # [END composer_kubernetespodoperator_secretobject] YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1)
def build_kubernetes_pod_operator(operator_ref, dag_ref): """ Builds a DAG operator of type: KubernetesPodOperator. Args: operator_ref (string): the definition of the operator dag_ref (string): the reference to the dag to associate this operator """ op = kubernetes_pod_operator.KubernetesPodOperator( task_id=operator_ref['task_id'], name=operator_ref['name'], image=operator_ref['image'], namespace=operator_ref['namespace'] if 'namespace' in operator_ref else 'default', dag=dag_ref) # populate non-default operator values if 'cmds' in operator_ref: op.cmds = operator_ref['cmds'] if 'arguments' in operator_ref: op.arguments = operator_ref['arguments'] if 'env_vars' in operator_ref: op.env_vars = operator_ref['env_vars'] if 'labels' in operator_ref: op.env_vars = operator_ref['labels'] if 'startup_timeout_seconds' in operator_ref: op.startup_timeout_seconds = operator_ref['startup_timeout_seconds'] if 'ports' in operator_ref: op.ports = operator_ref['ports'] if 'params' in operator_ref: op.params = operator_ref['params'] if 'node_selectors' in operator_ref: op.node_selectors = operator_ref['node_selectors'] if 'resources' in operator_ref: op.resources = operator_ref['resources'] if 'config_file' in operator_ref: op.config_file = operator_ref['config_file'] if 'annotations' in operator_ref: op.annotations = operator_ref['annotations'] if 'volumes' in operator_ref: op.volumes = operator_ref['volumes'] if 'volume_mounts' in operator_ref: op.volumes = operator_ref['volume_mounts'] if 'affinity' in operator_ref: op.affinity = operator_ref['affinity'] if 'configmaps' in operator_ref: op.configmaps = operator_ref['configmaps'] # define pod secrets pod_secrets = [] if 'pod_secret_refs' in operator_ref: for pod_secret in operator_ref['pod_secret_refs']: if not list(find_key_in_dict('kubernetes_secrets', payload)): raise ValueError( f"Pod {operator_ref['name']} declares 'pod_secret_refs' but 'kubernetes_secrets' has not been defined." ) secret_entry_ref = payload['kubernetes_secrets'][pod_secret] secret_entry = secret.Secret( # Deploy type: 'env' for environment variable or 'volume' deploy_type=secret_entry_ref['deploy_type'], # The name of the environment variable or the path of the volume deploy_target=secret_entry_ref['deploy_target'], # Name of the Kubernetes Secret secret=secret_entry_ref['secret'], # Key of a secret stored in this Secret object or key in the form of service account file name key=secret_entry_ref['key']) pod_secrets.append(secret_entry) op.secrets = pod_secrets if 'image_pull_policy' in operator_ref: op.image_pull_policy = operator_ref['image_pull_policy'] # define pull secrets image_pull_secrets = [] if 'image_pull_secret_refs' in operator_ref: for image_pull_secret in operator_ref['image_pull_secret_refs']: if not list(find_key_in_dict('kubernetes_secrets', payload)): raise ValueError( f"Pod {operator_ref['name']} declares 'image_pull_secret_refs' but 'kubernetes_secrets' has not been defined." ) secret_entry_ref = payload['kubernetes_secrets'][ image_pull_secret] secret_entry = secret.Secret( # Deploy type: 'env' for environment variable or 'volume' deploy_type=secret_entry_ref['deploy_type'], # The name of the environment variable or the path of the volume deploy_target=secret_entry_ref['deploy_target'], # Name of the Kubernetes Secret secret=secret_entry_ref['secret'], # Key of a secret stored in this Secret object or key in the form of service account file name key=secret_entry_ref['key']) image_pull_secrets.append(secret_entry) op.image_pull_secrets = image_pull_secrets return operator
from airflow.contrib.kubernetes import pod from airflow.contrib.kubernetes import secret from airflow.contrib.operators import kubernetes_pod_operator # A Secret is an object that contains a small amount of sensitive data such as # a password, a token, or a key. Such information might otherwise be put in a # Pod specification or in an image; putting it in a Secret object allows for # more control over how it is used, and reduces the risk of accidental # exposure. # [START composer_kubernetespodoperator_secretobject] secret_env = secret.Secret( # Expose the secret as environment variable. deploy_type='env', # The name of the environment variable, since deploy_type is `env` rather # than `volume`. deploy_target='SQL_CONN', # Name of the Kubernetes Secret secret='airflow-secrets', # Key of a secret stored in this Secret object key='sql_alchemy_conn') # [END composer_kubernetespodoperator_secretobject] YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1) # If a Pod fails to launch, or has an error occur in the container, Airflow # will show the task as failed, as well as contain all of the task logs # required to debug. with models.DAG(dag_id='composer_sample_kubernetes_pod', schedule_interval=datetime.timedelta(days=1), start_date=YESTERDAY) as dag: # Only name, namespace, image, and task_id are required to create a
from airflow.contrib.kubernetes import secret from airflow.contrib.operators import kubernetes_pod_operator from airflow.models import Variable composer_namespace = Variable.get("composer_namespace") bucket_name = Variable.get("bucket") env_name = Variable.get("env") default_args = {"email": ["*****@*****.**"]} YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1) # [START composer_kubernetespodoperator_secretobject] # First define a secret from a file secret_file = secret.Secret( deploy_type="volume", deploy_target="/tmp/secrets/google", secret="gc-storage-rw-key", key="key.json", ) # [END composer_kubernetespodoperator_secretobject] # If a Pod fails to launch, or has an error occur in the container, Airflow # will show the task as failed, as well as contain all of the task logs # required to debug. with models.DAG( dag_id="data_prep_cataloguer_pipeline", schedule_interval=datetime.timedelta(days=1), default_args=default_args, start_date=YESTERDAY, ) as dag: kubernetes_list_bucket_pod = kubernetes_pod_operator.KubernetesPodOperator( task_id="data-normalizer",
# [START composer_kubernetespodoperator] from airflow.contrib.operators import kubernetes_pod_operator # [END composer_kubernetespodoperator] # A Secret is an object that contains a small amount of sensitive data such as # a password, a token, or a key. Such information might otherwise be put in a # Pod specification or in an image; putting it in a Secret object allows for # more control over how it is used, and reduces the risk of accidental # exposure. secret_file = secret.Secret( # Mounts the secret as a file in RAM-backed tmpfs. deploy_type='volume', # File path of where to deploy the target, since deploy_type is 'volume' # rather than 'env'. deploy_target='/etc/sql_conn', # Name of secret in Kubernetes, if the secret is not already defined in # Kubernetes using kubectl the Pod will fail to find the secret, and in # turn, fail to launch. secret='airflow-secrets', # Key of the secret within Kubernetes. key='sql_alchemy_conn') secret_env = secret.Secret( # Expose the secret as environment variable. deploy_type='env', # The name of the environment variable, since deploy_type is `env` rather # than `volume`. deploy_target='SQL_CONN', secret='airflow-secrets', key='sql_alchemy_conn')
# We definitely want the logs. You can also look at logs in the kubernetes # pod 'get_logs': True, # A new pod is spun up for each task you run. Setting this to 'True' will # delete the pod after it runs. Regardless of whether the task ran # successfully or not. Set to 'False' when you're trying to debug. But # remember to delete your pods!! 'is_delete_operator_pod': True } # Docs: https://cloud.google.com/composer/docs/how-to/using/using-kubernetes-pod-operator#secret-config secret_env_test = secret.Secret( # Expose the secret as environment variable. deploy_type='env', # Specify the name of the environment variable for the pod deploy_target='TEST_ENV', # Name of the Kubernetes Secret from which to pull the environment variable secret='test-env', # Key of the secret stored in the Kubernetes Secret object key='TEST') # secret_storage = secret.Secret( # # Expose the secret as volume. # deploy_type='volume', # # Specify the path to the folder (not the full filepath) to store the secret # deploy_target='/etc/storage-credentials', # # Name of the Kubernetes Secret from which to pull the volume # secret='storage-admin', # # Name of the secret stored in the Kubernetes Secret object # # Incidentally, this will be become the filename of the secret. # # I.e. the full filepath will be /etc/storage-credentials/gcs-admin-key.json
from airflow import DAG from datetime import datetime, timedelta from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.operators.dummy_operator import DummyOperator from airflow.contrib.kubernetes import secret import logging import os import sys import traceback try: env_var_secret = secret.Secret( deploy_type='env', deploy_target='VERSION_NUMBER', secret='azure-registry', key='VERSION_NUMBER', ) default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.utcnow(), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG( 'jordi_test_get_secert1', default_args=default_args, schedule_interval=timedelta(minutes=10))