def test_driver_logging_error(self, mock_log_call, warn_log_call, mock_get_namespaced_crd, mock_kube_conn): sensor = SparkKubernetesSensor( application_name="spark_pi", attach_log=True, dag=self.dag, task_id="test_task_id", ) sensor.poke(None) warn_log_call.assert_called_once()
def test_pending_rerun_application(self, mock_get_namespaced_crd, mock_kubernetes_hook): sensor = SparkKubernetesSensor(application_name="spark_pi", dag=self.dag, task_id="test_task_id") self.assertFalse(sensor.poke(None)) mock_kubernetes_hook.assert_called_once_with() mock_get_namespaced_crd.assert_called_once_with( group="sparkoperator.k8s.io", name="spark_pi", namespace="default", plural="sparkapplications", version="v1beta2", )
def test_unknown_application(self, mock_get_namespaced_crd, mock_kubernetes_hook): sensor = SparkKubernetesSensor(application_name="spark_pi", dag=self.dag, task_id="test_task_id") with pytest.raises(AirflowException): sensor.poke(None) mock_kubernetes_hook.assert_called_once_with() mock_get_namespaced_crd.assert_called_once_with( group="sparkoperator.k8s.io", name="spark_pi", namespace="default", plural="sparkapplications", version="v1beta2", )
def test_driver_logging_completed(self, mock_log_call, info_log_call, mock_get_namespaced_crd, mock_kube_conn): sensor = SparkKubernetesSensor( application_name="spark_pi", attach_log=True, dag=self.dag, task_id="test_task_id", ) sensor.poke(None) mock_log_call.assert_called_once_with("spark_pi-driver") log_info_call = info_log_call.mock_calls[1] log_value = log_info_call[1][0] self.assertEqual(log_value, TEST_POD_LOG_RESULT)
def test_pending_rerun_application(self, mock_get_namespaced_crd, mock_kubernetes_hook): sensor = SparkKubernetesSensor(application_name='spark_pi', dag=self.dag, task_id='test_task_id') self.assertFalse(sensor.poke(None)) mock_kubernetes_hook.assert_called_once_with() mock_get_namespaced_crd.assert_called_once_with( group='sparkoperator.k8s.io', name='spark_pi', namespace='default', plural='sparkapplications', version='v1beta2')
def test_driver_logging_failure(self, mock_log_call, error_log_call, mock_get_namespaced_crd, mock_kube_conn): sensor = SparkKubernetesSensor( application_name="spark_pi", attach_log=True, dag=self.dag, task_id="test_task_id", ) with pytest.raises(AirflowException): sensor.poke(None) mock_log_call.assert_called_once_with("spark-pi-driver", namespace="default") error_log_call.assert_called_once_with(TEST_POD_LOG_RESULT)
def test_namespace_from_connection(self, mock_get_namespaced_crd, mock_kubernetes_hook): sensor = SparkKubernetesSensor( application_name='spark_pi', dag=self.dag, kubernetes_conn_id='kubernetes_with_namespace', task_id='test_task_id') sensor.poke(None) mock_kubernetes_hook.assert_called_once_with() mock_get_namespaced_crd.assert_called_once_with( group='sparkoperator.k8s.io', name='spark_pi', namespace='mock_namespace', plural='sparkapplications', version='v1beta2')
def test_namespace_from_connection(self, mock_get_namespaced_crd, mock_kubernetes_hook): sensor = SparkKubernetesSensor( application_name="spark_pi", dag=self.dag, kubernetes_conn_id="kubernetes_with_namespace", task_id="test_task_id", ) sensor.poke(None) mock_kubernetes_hook.assert_called_once_with() mock_get_namespaced_crd.assert_called_once_with( group="sparkoperator.k8s.io", name="spark_pi", namespace="mock_namespace", plural="sparkapplications", version="v1beta2", )
def test_api_group_and_version_from_sensor(self, mock_get_namespaced_crd, mock_kubernetes_hook): api_group = 'sparkoperator.example.com' api_version = 'v1alpha1' sensor = SparkKubernetesSensor( application_name="spark_pi", dag=self.dag, kubernetes_conn_id="kubernetes_with_namespace", task_id="test_task_id", api_group=api_group, api_version=api_version, ) sensor.poke(None) mock_kubernetes_hook.assert_called_once_with() mock_get_namespaced_crd.assert_called_once_with( group=api_group, name="spark_pi", namespace="mock_namespace", plural="sparkapplications", version=api_version, )
def test_unknown_application(self, mock_get_namespaced_crd, mock_kubernetes_hook): sensor = SparkKubernetesSensor(application_name='spark_pi', dag=self.dag, task_id='test_task_id') self.assertRaises(AirflowException, sensor.poke, None) mock_kubernetes_hook.assert_called_once_with() mock_get_namespaced_crd.assert_called_once_with( group='sparkoperator.k8s.io', name='spark_pi', namespace='default', plural='sparkapplications', version='v1beta2')
} # [END default_args] # [START instantiate_dag] dag = DAG('spark_pi', default_args=default_args, description='submit spark-pi as sparkApplication on kubernetes', schedule_interval=None, start_date=days_ago(1), user_defined_macros={'json': json}) t1 = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="spark-work", application_file="example_spark_kubernetes_spark_pi.yaml", kubernetes_conn_id="kubernetes_default", do_xcom_push=True, dag=dag, ) t2 = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="spark-work", application_name= "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_default", attach_log=True, dag=dag, ) t1 >> t2
'retry_delay': timedelta(minutes=10) } with DAG('minio-fifa-spark-operator', default_args=default_args, schedule_interval='@daily', tags=['development', 's3', 'minio', 'spark-operator']) as dag: etl_fifa_spark_operator = SparkKubernetesOperator( task_id='etl_fifa_spark_operator', namespace='processing', application_file='etl-fifa.yaml', kubernetes_conn_id='minikube', do_xcom_push=True) monitor_spark_app_status = SparkKubernetesSensor( task_id='monitor_spark_app_status', namespace="processing", application_name= "{{ task_instance.xcom_pull(task_ids='etl_fifa_spark_operator')['metadata']['name'] }}", kubernetes_conn_id="minikube") delete_s3_file_raw_zone = S3DeleteObjectsOperator( task_id='delete_s3_file_raw_zone', bucket=RAW_ZONE, keys='data.csv', aws_conn_id='minio', do_xcom_push=True) etl_fifa_spark_operator >> monitor_spark_app_status >> delete_s3_file_raw_zone
default_args = { "owner": "Airflow", "start_date": airflow.utils.dates.days_ago(1), "depends_on_past": False, "email_on_failure": False, "email_on_retry": False, "email": "*****@*****.**", "retries": 1, "retry_delay": timedelta(minutes=1) } with DAG(dag_id="ddt-spark-k8s-operator", schedule_interval="@hourly", default_args=default_args, catchup=False) as dag: t1 = SparkKubernetesOperator( task_id='stage_1_submit', namespace="ddt-compute", application_file="SparkApplication_stage_1.yaml", kubernetes_conn_id="kubernetes_default", do_xcom_push=True ) t2 = SparkKubernetesSensor( task_id='stage_1_monitor', namespace="ddt-compute", application_name="{{ task_instance.xcom_pull(task_ids='stage_1_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_default", ) t1 >> t2 #посмотреть логи спарк оператора
# [END default_args] # [START instantiate_dag] dag = DAG('spark_pi', default_args=default_args, schedule_interval=timedelta(days=1), tags=['example']) # spark = open( # "example_spark_kubernetes_operator_pi.yaml").read() submit = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="mycspace", application_file="example_spark_kubernetes_operator_pi.yaml", kubernetes_conn_id="kubernetes_in_cluster", do_xcom_push=True, dag=dag, ) sensor = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="mycspace", application_name= "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_in_cluster", dag=dag) submit >> sensor
name="download_txtfile", namespace="airflow", image="cirrusci/wget", cmds=[ "/bin/sh", "-c", "mkdir -p /mnt1/data && mkdir -p /mnt1/results && wget https://norvig.com/big.txt -O /mnt1/data/big.txt" ], task_id="download_txtfile", resources=compute_resources, volumes=[volume], volume_mounts=[volume_mount], get_logs=True, dag=dag) spark_task = SparkKubernetesOperator( task_id="spark-wordcount", namespace="airflow", application_file="spark-wordcount.yaml", kubernetes_conn_id="kubernetes_default", dag=dag, ) spark_sensor = SparkKubernetesSensor(task_id="spark-wordcount-monitor", namespace="airflow", application_name=spark_application_name, attach_log=True, kubernetes_conn_id="kubernetes_default", dag=dag) download_txtfile >> spark_task >> spark_sensor
'spark_pi', default_args=default_args, schedule_interval=timedelta(days=1), tags=['example'] ) # spark = open( # "example_spark_kubernetes_operator_pi.yaml").read() submit = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="sampletenant", application_file="example_spark_kubernetes_operator_pi.yaml", kubernetes_conn_id="kubernetes_in_cluster", do_xcom_push=True, dag=dag, api_group="sparkoperator.hpe.com" ) sensor = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="sampletenant", application_name="{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_in_cluster", dag=dag, api_group="sparkoperator.hpe.com", attach_log=True ) submit >> sensor
# [END import_module] # [START instantiate_dag] dag = DAG( 'spark_pi', default_args={'max_active_runs': 1}, description='submit spark-pi as sparkApplication on kubernetes', schedule_interval=timedelta(days=1), start_date=datetime(2021, 1, 1), catchup=False, ) t1 = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="default", application_file="example_spark_kubernetes_spark_pi.yaml", do_xcom_push=True, dag=dag, ) t2 = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="default", application_name= "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", dag=dag, ) t1 >> t2