Exemplo n.º 1
0
 def test_driver_logging_error(self, mock_log_call, warn_log_call,
                               mock_get_namespaced_crd, mock_kube_conn):
     sensor = SparkKubernetesSensor(
         application_name="spark_pi",
         attach_log=True,
         dag=self.dag,
         task_id="test_task_id",
     )
     sensor.poke(None)
     warn_log_call.assert_called_once()
Exemplo n.º 2
0
 def test_pending_rerun_application(self, mock_get_namespaced_crd, mock_kubernetes_hook):
     sensor = SparkKubernetesSensor(application_name="spark_pi", dag=self.dag, task_id="test_task_id")
     self.assertFalse(sensor.poke(None))
     mock_kubernetes_hook.assert_called_once_with()
     mock_get_namespaced_crd.assert_called_once_with(
         group="sparkoperator.k8s.io",
         name="spark_pi",
         namespace="default",
         plural="sparkapplications",
         version="v1beta2",
     )
Exemplo n.º 3
0
 def test_unknown_application(self, mock_get_namespaced_crd, mock_kubernetes_hook):
     sensor = SparkKubernetesSensor(application_name="spark_pi", dag=self.dag, task_id="test_task_id")
     with pytest.raises(AirflowException):
         sensor.poke(None)
     mock_kubernetes_hook.assert_called_once_with()
     mock_get_namespaced_crd.assert_called_once_with(
         group="sparkoperator.k8s.io",
         name="spark_pi",
         namespace="default",
         plural="sparkapplications",
         version="v1beta2",
     )
Exemplo n.º 4
0
 def test_driver_logging_completed(self, mock_log_call, info_log_call,
                                   mock_get_namespaced_crd, mock_kube_conn):
     sensor = SparkKubernetesSensor(
         application_name="spark_pi",
         attach_log=True,
         dag=self.dag,
         task_id="test_task_id",
     )
     sensor.poke(None)
     mock_log_call.assert_called_once_with("spark_pi-driver")
     log_info_call = info_log_call.mock_calls[1]
     log_value = log_info_call[1][0]
     self.assertEqual(log_value, TEST_POD_LOG_RESULT)
 def test_pending_rerun_application(self, mock_get_namespaced_crd,
                                    mock_kubernetes_hook):
     sensor = SparkKubernetesSensor(application_name='spark_pi',
                                    dag=self.dag,
                                    task_id='test_task_id')
     self.assertFalse(sensor.poke(None))
     mock_kubernetes_hook.assert_called_once_with()
     mock_get_namespaced_crd.assert_called_once_with(
         group='sparkoperator.k8s.io',
         name='spark_pi',
         namespace='default',
         plural='sparkapplications',
         version='v1beta2')
Exemplo n.º 6
0
 def test_driver_logging_failure(self, mock_log_call, error_log_call,
                                 mock_get_namespaced_crd, mock_kube_conn):
     sensor = SparkKubernetesSensor(
         application_name="spark_pi",
         attach_log=True,
         dag=self.dag,
         task_id="test_task_id",
     )
     with pytest.raises(AirflowException):
         sensor.poke(None)
     mock_log_call.assert_called_once_with("spark-pi-driver",
                                           namespace="default")
     error_log_call.assert_called_once_with(TEST_POD_LOG_RESULT)
 def test_namespace_from_connection(self, mock_get_namespaced_crd,
                                    mock_kubernetes_hook):
     sensor = SparkKubernetesSensor(
         application_name='spark_pi',
         dag=self.dag,
         kubernetes_conn_id='kubernetes_with_namespace',
         task_id='test_task_id')
     sensor.poke(None)
     mock_kubernetes_hook.assert_called_once_with()
     mock_get_namespaced_crd.assert_called_once_with(
         group='sparkoperator.k8s.io',
         name='spark_pi',
         namespace='mock_namespace',
         plural='sparkapplications',
         version='v1beta2')
Exemplo n.º 8
0
 def test_namespace_from_connection(self, mock_get_namespaced_crd, mock_kubernetes_hook):
     sensor = SparkKubernetesSensor(
         application_name="spark_pi",
         dag=self.dag,
         kubernetes_conn_id="kubernetes_with_namespace",
         task_id="test_task_id",
     )
     sensor.poke(None)
     mock_kubernetes_hook.assert_called_once_with()
     mock_get_namespaced_crd.assert_called_once_with(
         group="sparkoperator.k8s.io",
         name="spark_pi",
         namespace="mock_namespace",
         plural="sparkapplications",
         version="v1beta2",
     )
Exemplo n.º 9
0
 def test_api_group_and_version_from_sensor(self, mock_get_namespaced_crd,
                                            mock_kubernetes_hook):
     api_group = 'sparkoperator.example.com'
     api_version = 'v1alpha1'
     sensor = SparkKubernetesSensor(
         application_name="spark_pi",
         dag=self.dag,
         kubernetes_conn_id="kubernetes_with_namespace",
         task_id="test_task_id",
         api_group=api_group,
         api_version=api_version,
     )
     sensor.poke(None)
     mock_kubernetes_hook.assert_called_once_with()
     mock_get_namespaced_crd.assert_called_once_with(
         group=api_group,
         name="spark_pi",
         namespace="mock_namespace",
         plural="sparkapplications",
         version=api_version,
     )
 def test_unknown_application(self, mock_get_namespaced_crd,
                              mock_kubernetes_hook):
     sensor = SparkKubernetesSensor(application_name='spark_pi',
                                    dag=self.dag,
                                    task_id='test_task_id')
     self.assertRaises(AirflowException, sensor.poke, None)
     mock_kubernetes_hook.assert_called_once_with()
     mock_get_namespaced_crd.assert_called_once_with(
         group='sparkoperator.k8s.io',
         name='spark_pi',
         namespace='default',
         plural='sparkapplications',
         version='v1beta2')
}
# [END default_args]

# [START instantiate_dag]

dag = DAG('spark_pi',
          default_args=default_args,
          description='submit spark-pi as sparkApplication on kubernetes',
          schedule_interval=None,
          start_date=days_ago(1),
          user_defined_macros={'json': json})

t1 = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="spark-work",
    application_file="example_spark_kubernetes_spark_pi.yaml",
    kubernetes_conn_id="kubernetes_default",
    do_xcom_push=True,
    dag=dag,
)

t2 = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="spark-work",
    application_name=
    "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_default",
    attach_log=True,
    dag=dag,
)
t1 >> t2
Exemplo n.º 12
0
    'retry_delay': timedelta(minutes=10)
}

with DAG('minio-fifa-spark-operator',
         default_args=default_args,
         schedule_interval='@daily',
         tags=['development', 's3', 'minio', 'spark-operator']) as dag:

    etl_fifa_spark_operator = SparkKubernetesOperator(
        task_id='etl_fifa_spark_operator',
        namespace='processing',
        application_file='etl-fifa.yaml',
        kubernetes_conn_id='minikube',
        do_xcom_push=True)

    monitor_spark_app_status = SparkKubernetesSensor(
        task_id='monitor_spark_app_status',
        namespace="processing",
        application_name=
        "{{ task_instance.xcom_pull(task_ids='etl_fifa_spark_operator')['metadata']['name'] }}",
        kubernetes_conn_id="minikube")

    delete_s3_file_raw_zone = S3DeleteObjectsOperator(
        task_id='delete_s3_file_raw_zone',
        bucket=RAW_ZONE,
        keys='data.csv',
        aws_conn_id='minio',
        do_xcom_push=True)

    etl_fifa_spark_operator >> monitor_spark_app_status >> delete_s3_file_raw_zone
Exemplo n.º 13
0
default_args = {
            "owner": "Airflow",
            "start_date": airflow.utils.dates.days_ago(1),
            "depends_on_past": False,
            "email_on_failure": False,
            "email_on_retry": False,
            "email": "*****@*****.**",
            "retries": 1,
            "retry_delay": timedelta(minutes=1)
        }

with DAG(dag_id="ddt-spark-k8s-operator", schedule_interval="@hourly", default_args=default_args, catchup=False) as dag:
    t1 = SparkKubernetesOperator(
        task_id='stage_1_submit',
        namespace="ddt-compute",
        application_file="SparkApplication_stage_1.yaml",
        kubernetes_conn_id="kubernetes_default",
        do_xcom_push=True
    )
    t2 = SparkKubernetesSensor(
        task_id='stage_1_monitor',
        namespace="ddt-compute",
        application_name="{{ task_instance.xcom_pull(task_ids='stage_1_submit')['metadata']['name'] }}",
        kubernetes_conn_id="kubernetes_default",

    )
    t1 >> t2



#посмотреть логи спарк оператора
# [END default_args]

# [START instantiate_dag]

dag = DAG('spark_pi',
          default_args=default_args,
          schedule_interval=timedelta(days=1),
          tags=['example'])

# spark = open(
#     "example_spark_kubernetes_operator_pi.yaml").read()

submit = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="mycspace",
    application_file="example_spark_kubernetes_operator_pi.yaml",
    kubernetes_conn_id="kubernetes_in_cluster",
    do_xcom_push=True,
    dag=dag,
)

sensor = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="mycspace",
    application_name=
    "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_in_cluster",
    dag=dag)

submit >> sensor
    name="download_txtfile",
    namespace="airflow",
    image="cirrusci/wget",
    cmds=[
        "/bin/sh", "-c",
        "mkdir -p /mnt1/data &&  mkdir -p /mnt1/results && wget https://norvig.com/big.txt -O /mnt1/data/big.txt"
    ],
    task_id="download_txtfile",
    resources=compute_resources,
    volumes=[volume],
    volume_mounts=[volume_mount],
    get_logs=True,
    dag=dag)

spark_task = SparkKubernetesOperator(
    task_id="spark-wordcount",
    namespace="airflow",
    application_file="spark-wordcount.yaml",
    kubernetes_conn_id="kubernetes_default",
    dag=dag,
)

spark_sensor = SparkKubernetesSensor(task_id="spark-wordcount-monitor",
                                     namespace="airflow",
                                     application_name=spark_application_name,
                                     attach_log=True,
                                     kubernetes_conn_id="kubernetes_default",
                                     dag=dag)

download_txtfile >> spark_task >> spark_sensor
    'spark_pi',
    default_args=default_args,
    schedule_interval=timedelta(days=1),
    tags=['example']
)

# spark = open(
#     "example_spark_kubernetes_operator_pi.yaml").read()

submit = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="sampletenant",
    application_file="example_spark_kubernetes_operator_pi.yaml",
    kubernetes_conn_id="kubernetes_in_cluster",
    do_xcom_push=True,
    dag=dag,
    api_group="sparkoperator.hpe.com"
)

sensor = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="sampletenant",
    application_name="{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_in_cluster",
    dag=dag,
    api_group="sparkoperator.hpe.com",
    attach_log=True
)

submit >> sensor
# [END import_module]

# [START instantiate_dag]

dag = DAG(
    'spark_pi',
    default_args={'max_active_runs': 1},
    description='submit spark-pi as sparkApplication on kubernetes',
    schedule_interval=timedelta(days=1),
    start_date=datetime(2021, 1, 1),
    catchup=False,
)

t1 = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="default",
    application_file="example_spark_kubernetes_spark_pi.yaml",
    do_xcom_push=True,
    dag=dag,
)

t2 = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="default",
    application_name=
    "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    dag=dag,
)
t1 >> t2