def setUp(self): self.gke_op = GKEPodOperator(project_id=TEST_GCP_PROJECT_ID, location=PROJECT_LOCATION, cluster_name=CLUSTER_NAME, task_id=PROJECT_TASK_ID, name=TASK_NAME, namespace=NAMESPACE, image=IMAGE)
def PodOperator(*args, **kwargs): # TODO: tune this, and add resource limits namespace = kwargs.pop("namespace", "default") is_gke = kwargs.pop("is_gke", False) # we want to always pop() if "secrets" in kwargs: kwargs["secrets"] = map(lambda d: Secret(**d), kwargs["secrets"]) if is_development() or is_gke: return GKEPodOperator( *args, in_cluster=False, project_id= "cal-itp-data-infra", # there currently isn't a staging cluster location=kwargs.pop("pod_location", os.environ["POD_LOCATION"]), cluster_name=kwargs.pop("cluster_name", os.environ["POD_CLUSTER_NAME"]), namespace=namespace, image_pull_policy="Always" if is_development() else "IfNotPresent", **kwargs, ) else: return KubernetesPodOperator(*args, namespace=namespace, **kwargs)
def setUp(self): self.gke_op = GKEPodOperator(project_id=PROJECT_ID, location=PROJECT_LOCATION, cluster_name=CLUSTER_NAME, task_id=PROJECT_TASK_ID, name=TASK_NAME, namespace=NAMESPACE, image=IMAGE)
def setUp(self): self.gke_op = GKEPodOperator(project_id=TEST_GCP_PROJECT_ID, location=PROJECT_LOCATION, cluster_name=CLUSTER_NAME, task_id=PROJECT_TASK_ID, name=TASK_NAME, namespace=NAMESPACE, image=IMAGE) if GAC_ENV_VAR in os.environ: del os.environ[GAC_ENV_VAR]
def get_ds_step_pod(step_name): dag_args = [my_bucket] return GKEPodOperator(project_id='ked-airflow-01-2021', startup_timeout_seconds=600, location=my_location, cluster_name=my_cluster_name, namespace='default', task_id=step_name + '_' + 'titanic', image=get_image(step_name), name=step_name.replace('_', '-') + '-' + 'titanic', task_concurrency=1, image_pull_policy='IfNotPresent', is_delete_operator_pod=True, hostnetwork=False, arguments=dag_args)
def pod_operator(*args, **kwargs): # TODO: tune this, and add resource limits namespace = "default" if is_development(): return GKEPodOperator( *args, in_cluster=False, project_id=os.environ["GOOGLE_CLOUD_PROJECT"], location=os.environ["POD_LOCATION"], cluster_name=os.environ["POD_CLUSTER_NAME"], namespace=namespace, **kwargs, ) else: return KubernetesPodOperator(*args, namespace=namespace, **kwargs)
class GKEPodOperatorTest(unittest.TestCase): def setUp(self): self.gke_op = GKEPodOperator(project_id=TEST_GCP_PROJECT_ID, location=PROJECT_LOCATION, cluster_name=CLUSTER_NAME, task_id=PROJECT_TASK_ID, name=TASK_NAME, namespace=NAMESPACE, image=IMAGE) if GAC_ENV_VAR in os.environ: del os.environ[GAC_ENV_VAR] def test_template_fields(self): self.assertTrue( set(KubernetesPodOperator.template_fields).issubset( GKEPodOperator.template_fields)) # pylint:disable=unused-argument @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute' ) @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") def test_execute_conn_id_none(self, proc_mock, file_mock, exec_mock): self.gke_op.gcp_conn_id = None file_mock.return_value.__enter__.return_value.name = FILE_NAME self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) # pylint:disable=unused-argument @mock.patch('airflow.hooks.base_hook.BaseHook.get_connection') @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute' ) @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") @mock.patch.dict(os.environ, {}) def test_execute_conn_id_path(self, proc_mock, file_mock, exec_mock, get_con_mock): # gcp_conn_id is defaulted to `google_cloud_default` file_path = '/path/to/file' kaeyfile_dict = {"extra__google_cloud_platform__key_path": file_path} get_con_mock.return_value.extra_dejson = kaeyfile_dict file_mock.return_value.__enter__.return_value.name = FILE_NAME self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) self.assertIn(GAC_ENV_VAR, os.environ) # since we passed in keyfile_path we should get a file self.assertEqual(os.environ[GAC_ENV_VAR], file_path) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) # pylint:disable=unused-argument @mock.patch.dict(os.environ, {}) @mock.patch('airflow.hooks.base_hook.BaseHook.get_connection') @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute' ) @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") def test_execute_conn_id_dict(self, proc_mock, file_mock, exec_mock, get_con_mock): # gcp_conn_id is defaulted to `google_cloud_default` file_path = '/path/to/file' # This is used in the _set_env_from_extras method file_mock.return_value.name = file_path # This is used in the execute method file_mock.return_value.__enter__.return_value.name = FILE_NAME keyfile_dict = { "extra__google_cloud_platform__keyfile_dict": '{"private_key": "r4nd0m_k3y"}' } get_con_mock.return_value.extra_dejson = keyfile_dict self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) self.assertIn(GAC_ENV_VAR, os.environ) # since we passed in keyfile_path we should get a file self.assertEqual(os.environ[GAC_ENV_VAR], file_path) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) @mock.patch.dict(os.environ, {}) def test_set_env_from_extras_none(self): extras = {} self.gke_op._set_env_from_extras(extras) # _set_env_from_extras should not edit os.environ if extras does not specify self.assertNotIn(GAC_ENV_VAR, os.environ) @mock.patch.dict(os.environ, {}) @mock.patch('tempfile.NamedTemporaryFile') def test_set_env_from_extras_dict(self, file_mock): keyfile_dict_str = '{ \"test\": \"cluster\" }' extras = { 'extra__google_cloud_platform__keyfile_dict': keyfile_dict_str, } def mock_temp_write(content): if not isinstance(content, bytes): raise TypeError( "a bytes-like object is required, not {}".format( type(content).__name__)) file_mock.return_value.write = mock_temp_write file_mock.return_value.name = FILE_NAME key_file = self.gke_op._set_env_from_extras(extras) self.assertEqual(os.environ[GAC_ENV_VAR], FILE_NAME) self.assertIsInstance(key_file, mock.MagicMock) @mock.patch.dict(os.environ, {}) def test_set_env_from_extras_path(self): test_path = '/test/path' extras = { 'extra__google_cloud_platform__key_path': test_path, } self.gke_op._set_env_from_extras(extras) self.assertEqual(os.environ[GAC_ENV_VAR], test_path) def test_get_field(self): field_name = 'test_field' field_value = 'test_field_value' extras = { 'extra__google_cloud_platform__{}'.format(field_name): field_value } ret_val = self.gke_op._get_field(extras, field_name) self.assertEqual(field_value, ret_val) @mock.patch( 'airflow.contrib.operators.gcp_container_operator.GKEPodOperator.log') def test_get_field_fail(self, log_mock): log_mock.info = mock.Mock() log_str = 'Field %s not found in extras.' field_name = 'test_field' field_value = 'test_field_value' extras = {} ret_val = self.gke_op._get_field(extras, field_name, default=field_value) # Assert default is returned upon failure self.assertEqual(field_value, ret_val) log_mock.info.assert_called_with(log_str, field_name)
class GKEPodOperatorTest(unittest.TestCase): def setUp(self): self.gke_op = GKEPodOperator(project_id=TEST_GCP_PROJECT_ID, location=PROJECT_LOCATION, cluster_name=CLUSTER_NAME, task_id=PROJECT_TASK_ID, name=TASK_NAME, namespace=NAMESPACE, image=IMAGE) if GAC_ENV_VAR in os.environ: del os.environ[GAC_ENV_VAR] def test_template_fields(self): self.assertTrue(set(KubernetesPodOperator.template_fields).issubset( GKEPodOperator.template_fields)) @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute') @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") def test_execute_conn_id_none(self, proc_mock, file_mock, exec_mock): self.gke_op.gcp_conn_id = None file_mock.return_value.__enter__.return_value.name = FILE_NAME self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) @mock.patch('airflow.hooks.base_hook.BaseHook.get_connection') @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute') @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") @mock.patch.dict(os.environ, {}) def test_execute_conn_id_path(self, proc_mock, file_mock, exec_mock, get_con_mock): # gcp_conn_id is defaulted to `google_cloud_default` FILE_PATH = '/path/to/file' KEYFILE_DICT = {"extra__google_cloud_platform__key_path": FILE_PATH} get_con_mock.return_value.extra_dejson = KEYFILE_DICT file_mock.return_value.__enter__.return_value.name = FILE_NAME self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) self.assertIn(GAC_ENV_VAR, os.environ) # since we passed in keyfile_path we should get a file self.assertEqual(os.environ[GAC_ENV_VAR], FILE_PATH) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) @mock.patch.dict(os.environ, {}) @mock.patch('airflow.hooks.base_hook.BaseHook.get_connection') @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute') @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") def test_execute_conn_id_dict(self, proc_mock, file_mock, exec_mock, get_con_mock): # gcp_conn_id is defaulted to `google_cloud_default` FILE_PATH = '/path/to/file' # This is used in the _set_env_from_extras method file_mock.return_value.name = FILE_PATH # This is used in the execute method file_mock.return_value.__enter__.return_value.name = FILE_NAME KEYFILE_DICT = {"extra__google_cloud_platform__keyfile_dict": '{"private_key": "r4nd0m_k3y"}'} get_con_mock.return_value.extra_dejson = KEYFILE_DICT self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) self.assertIn(GAC_ENV_VAR, os.environ) # since we passed in keyfile_path we should get a file self.assertEqual(os.environ[GAC_ENV_VAR], FILE_PATH) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) @mock.patch.dict(os.environ, {}) def test_set_env_from_extras_none(self): extras = {} self.gke_op._set_env_from_extras(extras) # _set_env_from_extras should not edit os.environ if extras does not specify self.assertNotIn(GAC_ENV_VAR, os.environ) @mock.patch.dict(os.environ, {}) @mock.patch('tempfile.NamedTemporaryFile') def test_set_env_from_extras_dict(self, file_mock): file_mock.return_value.name = FILE_NAME KEYFILE_DICT_STR = '{ \"test\": \"cluster\" }' extras = { 'extra__google_cloud_platform__keyfile_dict': KEYFILE_DICT_STR, } self.gke_op._set_env_from_extras(extras) self.assertEquals(os.environ[GAC_ENV_VAR], FILE_NAME) file_mock.return_value.write.assert_called_once_with(KEYFILE_DICT_STR) @mock.patch.dict(os.environ, {}) def test_set_env_from_extras_path(self): TEST_PATH = '/test/path' extras = { 'extra__google_cloud_platform__key_path': TEST_PATH, } self.gke_op._set_env_from_extras(extras) self.assertEquals(os.environ[GAC_ENV_VAR], TEST_PATH) def test_get_field(self): FIELD_NAME = 'test_field' FIELD_VALUE = 'test_field_value' extras = { 'extra__google_cloud_platform__{}'.format(FIELD_NAME): FIELD_VALUE } ret_val = self.gke_op._get_field(extras, FIELD_NAME) self.assertEqual(FIELD_VALUE, ret_val) @mock.patch('airflow.contrib.operators.gcp_container_operator.GKEPodOperator.log') def test_get_field_fail(self, log_mock): log_mock.info = mock.Mock() LOG_STR = 'Field {} not found in extras.' FIELD_NAME = 'test_field' FIELD_VALUE = 'test_field_value' extras = {} ret_val = self.gke_op._get_field(extras, FIELD_NAME, default=FIELD_VALUE) # Assert default is returned upon failure self.assertEqual(FIELD_VALUE, ret_val) log_mock.info.assert_called_with(LOG_STR.format(FIELD_NAME))
class GKEPodOperatorTest(unittest.TestCase): def setUp(self): self.gke_op = GKEPodOperator(project_id=TEST_GCP_PROJECT_ID, location=PROJECT_LOCATION, cluster_name=CLUSTER_NAME, task_id=PROJECT_TASK_ID, name=TASK_NAME, namespace=NAMESPACE, image=IMAGE) if GAC_ENV_VAR in os.environ: del os.environ[GAC_ENV_VAR] def test_template_fields(self): self.assertTrue(set(KubernetesPodOperator.template_fields).issubset( GKEPodOperator.template_fields)) @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute') @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") def test_execute_conn_id_none(self, proc_mock, file_mock, exec_mock): self.gke_op.gcp_conn_id = None file_mock.return_value.__enter__.return_value.name = FILE_NAME self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) @mock.patch('airflow.hooks.base_hook.BaseHook.get_connection') @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute') @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") @mock.patch.dict(os.environ, {}) def test_execute_conn_id_path(self, proc_mock, file_mock, exec_mock, get_con_mock): # gcp_conn_id is defaulted to `google_cloud_default` FILE_PATH = '/path/to/file' KEYFILE_DICT = {"extra__google_cloud_platform__key_path": FILE_PATH} get_con_mock.return_value.extra_dejson = KEYFILE_DICT file_mock.return_value.__enter__.return_value.name = FILE_NAME self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) self.assertIn(GAC_ENV_VAR, os.environ) # since we passed in keyfile_path we should get a file self.assertEqual(os.environ[GAC_ENV_VAR], FILE_PATH) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) @mock.patch.dict(os.environ, {}) @mock.patch('airflow.hooks.base_hook.BaseHook.get_connection') @mock.patch( 'airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator.execute') @mock.patch('tempfile.NamedTemporaryFile') @mock.patch("subprocess.check_call") def test_execute_conn_id_dict(self, proc_mock, file_mock, exec_mock, get_con_mock): # gcp_conn_id is defaulted to `google_cloud_default` FILE_PATH = '/path/to/file' # This is used in the _set_env_from_extras method file_mock.return_value.name = FILE_PATH # This is used in the execute method file_mock.return_value.__enter__.return_value.name = FILE_NAME KEYFILE_DICT = {"extra__google_cloud_platform__keyfile_dict": '{"private_key": "r4nd0m_k3y"}'} get_con_mock.return_value.extra_dejson = KEYFILE_DICT self.gke_op.execute(None) # Assert Environment Variable is being set correctly self.assertIn(KUBE_ENV_VAR, os.environ) self.assertEqual(os.environ[KUBE_ENV_VAR], FILE_NAME) self.assertIn(GAC_ENV_VAR, os.environ) # since we passed in keyfile_path we should get a file self.assertEqual(os.environ[GAC_ENV_VAR], FILE_PATH) # Assert the gcloud command being called correctly proc_mock.assert_called_with( GCLOUD_COMMAND.format(CLUSTER_NAME, PROJECT_LOCATION, TEST_GCP_PROJECT_ID).split()) self.assertEqual(self.gke_op.config_file, FILE_NAME) @mock.patch.dict(os.environ, {}) def test_set_env_from_extras_none(self): extras = {} self.gke_op._set_env_from_extras(extras) # _set_env_from_extras should not edit os.environ if extras does not specify self.assertNotIn(GAC_ENV_VAR, os.environ) @mock.patch.dict(os.environ, {}) @mock.patch('tempfile.NamedTemporaryFile') def test_set_env_from_extras_dict(self, file_mock): file_mock.return_value.name = FILE_NAME KEYFILE_DICT_STR = '{ \"test\": \"cluster\" }' extras = { 'extra__google_cloud_platform__keyfile_dict': KEYFILE_DICT_STR, } self.gke_op._set_env_from_extras(extras) self.assertEqual(os.environ[GAC_ENV_VAR], FILE_NAME) file_mock.return_value.write.assert_called_once_with(KEYFILE_DICT_STR) @mock.patch.dict(os.environ, {}) def test_set_env_from_extras_path(self): TEST_PATH = '/test/path' extras = { 'extra__google_cloud_platform__key_path': TEST_PATH, } self.gke_op._set_env_from_extras(extras) self.assertEqual(os.environ[GAC_ENV_VAR], TEST_PATH) def test_get_field(self): FIELD_NAME = 'test_field' FIELD_VALUE = 'test_field_value' extras = { 'extra__google_cloud_platform__{}'.format(FIELD_NAME): FIELD_VALUE } ret_val = self.gke_op._get_field(extras, FIELD_NAME) self.assertEqual(FIELD_VALUE, ret_val) @mock.patch('airflow.contrib.operators.gcp_container_operator.GKEPodOperator.log') def test_get_field_fail(self, log_mock): log_mock.info = mock.Mock() LOG_STR = 'Field %s not found in extras.' FIELD_NAME = 'test_field' FIELD_VALUE = 'test_field_value' extras = {} ret_val = self.gke_op._get_field(extras, FIELD_NAME, default=FIELD_VALUE) # Assert default is returned upon failure self.assertEqual(FIELD_VALUE, ret_val) log_mock.info.assert_called_with(LOG_STR, FIELD_NAME)
with models.DAG( "example_gcp_gke", schedule_interval=None, # Override to match your needs start_date=days_ago(1), tags=['example'], ) as dag: # [START howto_operator_gke_start_pod_xcom] pod_task_xcom = GKEPodOperator( task_id="pod_task_xcom", project_id=GCP_PROJECT_ID, location=GCP_LOCATION, cluster_name=CLUSTER_NAME, do_xcom_push=True, namespace="default", image="alpine", cmds=[ "sh", "-c", 'mkdir -p /airflow/xcom/;echo \'[1,2,3,4]\' > /airflow/xcom/return.json' ], name="test-pod-xcom", ) # [END howto_operator_gke_start_pod_xcom] # Due to bug this will print Nothing, xcom does not work with this opeator # https://stackoverflow.com/questions/58349627/airflow-gkepodoperator-xcom-push-returns-none/59285712#59285712 # [START howto_operator_gke_xcom_result] pod_task_xcom_result = BashOperator( bash_command= "echo \"{{ task_instance.xcom_pull('pod_task_xcom')[0] }}\"",
import os import datetime from airflow import DAG from airflow.contrib.operators.gcp_container_operator import GKEPodOperator from airflow.utils.dates import days_ago default_args = { 'owner': 'Airflow', 'depends_on_past': False, 'email': os.environ['GOOGLE_FAILURE_EMAIL'], 'start_date': days_ago(0), 'email_on_failure': True, } dag = DAG(dag_id='games_docker', default_args=default_args, schedule_interval="* * * * *") t1 = GKEPodOperator( task_id='sklearn_pipeline', project_id=os.environ['GOOGLE_PROJECT_ID'], cluster_name=os.environ['GOOGLE_GKE_CLUSTER_NAME'], name='sklearn-pipeline', namespace='default', location=os.environ['GOOGLE_GKE_CLUSTER_LOCATION'], image=f"us.gcr.io/{os.environ['GOOGLE_PROJECT_ID']}/sklearn_pipeline", dag=dag)